deepflame-ai · xiao312 · Mar 31, 2026 · Mar 31, 2026
diff --git a/README.md b/README.md
@@ -126,10 +126,12 @@ Typical next steps are:
 
 ```bash
 python -m dfode_kit.cli.main augment \
+  --source /path/to/run/oneD_flame_CH4_phi1/ch4_phi1_sample.h5 \
   --mech /path/to/mechanisms/CH4/gri30.yaml \
-  --h5_file /path/to/run/oneD_flame_CH4_phi1/ch4_phi1_sample.h5 \
-  --output_file /path/to/data/ch4_phi1_aug.npy \
-  --dataset_num 20000
+  --save /path/to/data/ch4_phi1_aug.npy \
+  --preset random-local-combustion-v1 \
+  --target-size 20000 \
+  --apply
 
 python -m dfode_kit.cli.main label \
   --mech /path/to/mechanisms/CH4/gri30.yaml \

diff --git a/dfode_kit/cli/commands/augment.py b/dfode_kit/cli/commands/augment.py
@@ -1,62 +1,84 @@
-def add_command_parser(subparsers):
-    augment_parser = subparsers.add_parser('augment', help='Perform data augmentation.')
+from __future__ import annotations
 
-    augment_parser.add_argument(
-        '--mech',
-        required=True,
-        type=str,
-        help='Path to the YAML mechanism file.'
-    )
-    augment_parser.add_argument(
-        '--h5_file',
-        required=True,
-        type=str,
-        help='Path to the h5 file to augment.'
+import json
+
+from dfode_kit.cli.commands.augment_helpers import DEFAULT_AUGMENT_PRESET
+
+
+def add_command_parser(subparsers):
+    augment_parser = subparsers.add_parser(
+        'augment',
+        help='Perform data augmentation from sampled state data.',
     )
+    augment_parser.add_argument('--source', type=str, help='Path to the sampled HDF5 source file.')
+    augment_parser.add_argument('--mech', type=str, help='Path to the YAML mechanism file.')
+    augment_parser.add_argument('--save', type=str, help='Path to save the augmented NumPy array.')
     augment_parser.add_argument(
-        '--output_file',
-        required=True,
+        '--preset',
         type=str,
-        help='Path to the output NUMPY file.'
+        default=DEFAULT_AUGMENT_PRESET,
+        help='Named augmentation preset.',
     )
     augment_parser.add_argument(
-        '--heat_limit',
-        type=bool,
-        default=False,
-        help='contraint perturbed data with heat release.'
-    )
-    augment_parser.add_argument(
-        '--element_limit',
-        type=bool,
-        default=True,
-        help='contraint perturbed data with element ratio.'
-    )
-    augment_parser.add_argument(
-        '--dataset_num',
-        required=True,
+        '--target-size',
+        dest='target_size',
         type=int,
-        help='num of dataset.'
-    )
-    augment_parser.add_argument(
-        '--perturb_factor',
-        type=float,
-        default=0.1,
-        help='Factor to perturb the data by.'
+        help='Requested number of augmented rows.',
     )
+    augment_parser.add_argument('--seed', type=int, help='Random seed for reproducible augmentation.')
+    augment_parser.add_argument('--from-config', type=str, help='Load an augment plan/config JSON.')
+    augment_parser.add_argument('--write-config', type=str, help='Write the resolved augment plan/config to JSON.')
+    augment_parser.add_argument('--preview', action='store_true', help='Preview the resolved plan without executing augmentation.')
+    augment_parser.add_argument('--apply', action='store_true', help='Execute augmentation and write the output array.')
+    augment_parser.add_argument('--json', action='store_true', help='Print structured JSON output.')
 
 
 def handle_command(args):
-    import numpy as np
+    from dfode_kit.cli.commands.augment_helpers import apply_augment_plan, dump_plan_json, resolve_augment_plan
+
+    if not args.preview and not args.apply and not args.write_config:
+        raise ValueError('Specify at least one action: --preview, --apply, or --write-config.')
+
+    plan = resolve_augment_plan(args)
+    json_result = {'command_type': 'augment'} if args.json else None
+
+    if args.write_config:
+        config_path = dump_plan_json(plan, args.write_config)
+        if args.json:
+            json_result['config_written'] = {'path': str(config_path)}
+        else:
+            print(f'Wrote augment config: {config_path}')
+
+    if args.preview:
+        if args.json:
+            json_result['plan'] = plan
+        else:
+            _print_human_plan(plan)
 
-    from dfode_kit.data import get_TPY_from_h5, random_perturb
+    if args.apply:
+        result = apply_augment_plan(plan, quiet=args.json)
+        if args.json:
+            json_result['apply'] = result
+        else:
+            print(f"Completed augmentation from: {result['source']}")
+            print(f"output_path: {result['output_path']}")
+            print(f"returned_count: {result['returned_count']}")
 
-    print('Handling augment command')
-    print(f'Loading data from h5 file: {args.h5_file}')
-    data = get_TPY_from_h5(args.h5_file)
-    print('Data shape:', data.shape)
+    if args.json:
+        print(json.dumps(json_result, indent=2, sort_keys=True))
 
-    all_data = random_perturb(data, args.mech, args.dataset_num, args.heat_limit, args.element_limit)
 
-    np.save(args.output_file, all_data)
-    print('Saved augmented data shape:', all_data.shape)
-    print(f'Saved augmented data to {args.output_file}')
+def _print_human_plan(plan: dict):
+    print('Resolved augment plan')
+    print(f"preset: {plan['preset']}")
+    print(f"source: {plan['source']}")
+    print(f"mechanism: {plan['mechanism']}")
+    print(f"save: {plan['save']}")
+    print(f"target_size: {plan['target_size']}")
+    print(f"seed: {plan['seed']}")
+    print('resolved:')
+    for key in sorted(plan['resolved']):
+        print(f"  {key}: {plan['resolved'][key]}")
+    print('notes:')
+    for note in plan['notes']:
+        print(f'  - {note}')
diff --git a/dfode_kit/cli/commands/augment_helpers.py b/dfode_kit/cli/commands/augment_helpers.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import io
+import json
+from contextlib import redirect_stdout
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+DEFAULT_AUGMENT_PRESET = 'random-local-combustion-v1'
+
+
+@dataclass(frozen=True)
+class AugmentPreset:
+    name: str
+    summary: str
+    resolved: dict[str, Any]
+    notes: list[str]
+
+
+AUGMENT_PRESETS: dict[str, AugmentPreset] = {
+    DEFAULT_AUGMENT_PRESET: AugmentPreset(
+        name=DEFAULT_AUGMENT_PRESET,
+        summary='Current random local perturbation workflow with combustion-oriented defaults.',
+        resolved={
+            'heat_limit': False,
+            'element_limit': True,
+        },
+        notes=[
+            'This preset preserves the current default augmentation behavior on main.',
+            'The CLI intentionally keeps the public surface minimal; detailed tuning should happen through config round-trip or future preset revisions.',
+        ],
+    )
+}
+
+
+def get_augment_preset(name: str) -> AugmentPreset:
+    try:
+        return AUGMENT_PRESETS[name]
+    except KeyError as exc:
+        raise ValueError(
+            f"Unknown augment preset: {name}. Available presets: {', '.join(sorted(AUGMENT_PRESETS))}"
+        ) from exc
+
+
+def resolve_augment_plan(args) -> dict[str, Any]:
+    if args.from_config:
+        plan = load_plan_json(args.from_config)
+        if plan.get('command_type') != 'augment':
+            raise ValueError(f"Unsupported command_type in config: {plan.get('command_type')}")
+
+        source = args.source or plan.get('source')
+        mech = args.mech or plan.get('mechanism')
+        save = args.save or plan.get('save')
+        preset_name = args.preset or plan.get('preset', DEFAULT_AUGMENT_PRESET)
+        target_size = args.target_size if args.target_size is not None else plan.get('target_size')
+        seed = args.seed if args.seed is not None else plan.get('seed')
+    else:
+        _validate_required_args(args, ('source', 'mech', 'preset', 'target_size'))
+        source = args.source
+        mech = args.mech
+        save = args.save
+        preset_name = args.preset
+        target_size = args.target_size
+        seed = args.seed
+
+    if args.apply and not save:
+        raise ValueError('The --save path is required when using --apply.')
+
+    preset = get_augment_preset(preset_name)
+    source_path = Path(source).resolve()
+    if not source_path.is_file():
+        raise ValueError(f'Source file does not exist: {source_path}')
+
+    mechanism_path = Path(mech).resolve()
+    if not mechanism_path.is_file():
+        raise ValueError(f'Mechanism file does not exist: {mechanism_path}')
+
+    plan = {
+        'schema_version': 1,
+        'command_type': 'augment',
+        'preset': preset.name,
+        'preset_summary': preset.summary,
+        'source': str(source_path),
+        'mechanism': str(mechanism_path),
+        'save': str(Path(save).resolve()) if save else None,
+        'target_size': int(target_size),
+        'seed': int(seed) if seed is not None else None,
+        'config_path': str(Path(args.from_config).resolve()) if args.from_config else None,
+        'notes': preset.notes,
+        'resolved': dict(preset.resolved),
+    }
+    return plan
+
+
+def apply_augment_plan(plan: dict[str, Any], quiet: bool = False) -> dict[str, Any]:
+    import numpy as np
+
+    from dfode_kit.data import get_TPY_from_h5, random_perturb
+
+    source_path = Path(plan['source']).resolve()
+    output_path = Path(plan['save']).resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if quiet:
+        with redirect_stdout(io.StringIO()):
+            data = get_TPY_from_h5(source_path)
+            augmented = random_perturb(
+                data,
+                plan['mechanism'],
+                plan['target_size'],
+                plan['resolved']['heat_limit'],
+                plan['resolved']['element_limit'],
+                seed=plan.get('seed'),
+            )
+    else:
+        print('Handling augment command')
+        print(f'Loading data from h5 file: {source_path}')
+        data = get_TPY_from_h5(source_path)
+        print('Data shape:', data.shape)
+        augmented = random_perturb(
+            data,
+            plan['mechanism'],
+            plan['target_size'],
+            plan['resolved']['heat_limit'],
+            plan['resolved']['element_limit'],
+            seed=plan.get('seed'),
+        )
+        print('Saved augmented data shape:', augmented.shape)
+        print(f'Saved augmented data to {output_path}')
+
+    np.save(output_path, augmented)
+
+    return {
+        'event': 'augmentation_completed',
+        'source': str(source_path),
+        'output_path': str(output_path),
+        'preset': plan['preset'],
+        'requested_count': int(plan['target_size']),
+        'returned_count': int(augmented.shape[0]),
+        'feature_count': int(augmented.shape[1]) if augmented.ndim == 2 else None,
+        'seed': plan.get('seed'),
+    }
+
+
+def dump_plan_json(plan: dict[str, Any], path: str | Path) -> Path:
+    output_path = Path(path).resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(plan, indent=2, sort_keys=True) + '\n', encoding='utf-8')
+    return output_path
+
+
+def load_plan_json(path: str | Path) -> dict[str, Any]:
+    input_path = Path(path).resolve()
+    return json.loads(input_path.read_text(encoding='utf-8'))
+
+
+def _validate_required_args(args, names: tuple[str, ...]):
+    missing = [f'--{name.replace("_", "-")}' for name in names if getattr(args, name) is None]
+    if missing:
+        raise ValueError(f'Missing required arguments: {", ".join(missing)}')
diff --git a/dfode_kit/data/augment.py b/dfode_kit/data/augment.py
@@ -33,9 +33,12 @@ def random_perturb(
     cq: float = 10,
     inert_idx: int = -1,
     time_step: float = 1e-6,
+    seed: int | None = None,
 ) -> np.ndarray:
     import cantera as ct
 
+    rng = np.random.default_rng(seed)
+
     array = array[array[:, 0] > frozenTem]
 
     gas = ct.Solution(mech_path)
@@ -78,11 +81,11 @@ def random_perturb(
 
                 test_r = np.copy(array[j])
 
-                test_tmp[0] = test_r[0] + (maxT - minT) * (2 * np.random.rand() - 1.0) * alpha
-                test_tmp[1] = test_r[1] + (maxP - minP) * (2 * np.random.rand() - 1.0) * alpha * 20
-                test_tmp[-1] = test_r[-1] + (maxN2 - minN2) * (2 * np.random.rand() - 1) * alpha
+                test_tmp[0] = test_r[0] + (maxT - minT) * (2 * rng.random() - 1.0) * alpha
+                test_tmp[1] = test_r[1] + (maxP - minP) * (2 * rng.random() - 1.0) * alpha * 20
+                test_tmp[-1] = test_r[-1] + (maxN2 - minN2) * (2 * rng.random() - 1) * alpha
                 for i in range(2, array.shape[1] - 1):
-                    test_tmp[i] = np.abs(test_r[i]) ** (1 + (2 * np.random.rand() - 1) * alpha)
+                    test_tmp[i] = np.abs(test_r[i]) ** (1 + (2 * rng.random() - 1) * alpha)
                 test_tmp[2:-1] = test_tmp[2:-1] / np.sum(test_tmp[2:-1]) * (1 - test_tmp[-1])
 
                 if heat_limit:
@@ -133,7 +136,7 @@ def random_perturb(
         print(num)
 
     new_array = np.array(new_array)
-    new_array = new_array[np.random.choice(new_array.shape[0], size=dataset)]
+    new_array = new_array[rng.choice(new_array.shape[0], size=dataset)]
     unique_array = np.unique(new_array, axis=0)
     print(unique_array.shape)
     return unique_array