Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,12 @@ Typical next steps are:

```bash
python -m dfode_kit.cli.main augment \
--source /path/to/run/oneD_flame_CH4_phi1/ch4_phi1_sample.h5 \
--mech /path/to/mechanisms/CH4/gri30.yaml \
--h5_file /path/to/run/oneD_flame_CH4_phi1/ch4_phi1_sample.h5 \
--output_file /path/to/data/ch4_phi1_aug.npy \
--dataset_num 20000
--save /path/to/data/ch4_phi1_aug.npy \
--preset random-local-combustion-v1 \
--target-size 20000 \
--apply

python -m dfode_kit.cli.main label \
--mech /path/to/mechanisms/CH4/gri30.yaml \
Expand Down
116 changes: 69 additions & 47 deletions dfode_kit/cli/commands/augment.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,84 @@
def add_command_parser(subparsers):
augment_parser = subparsers.add_parser('augment', help='Perform data augmentation.')
from __future__ import annotations

augment_parser.add_argument(
'--mech',
required=True,
type=str,
help='Path to the YAML mechanism file.'
)
augment_parser.add_argument(
'--h5_file',
required=True,
type=str,
help='Path to the h5 file to augment.'
import json

from dfode_kit.cli.commands.augment_helpers import DEFAULT_AUGMENT_PRESET


def add_command_parser(subparsers):
augment_parser = subparsers.add_parser(
'augment',
help='Perform data augmentation from sampled state data.',
)
augment_parser.add_argument('--source', type=str, help='Path to the sampled HDF5 source file.')
augment_parser.add_argument('--mech', type=str, help='Path to the YAML mechanism file.')
augment_parser.add_argument('--save', type=str, help='Path to save the augmented NumPy array.')
augment_parser.add_argument(
'--output_file',
required=True,
'--preset',
type=str,
help='Path to the output NUMPY file.'
default=DEFAULT_AUGMENT_PRESET,
help='Named augmentation preset.',
)
augment_parser.add_argument(
'--heat_limit',
type=bool,
default=False,
help='contraint perturbed data with heat release.'
)
augment_parser.add_argument(
'--element_limit',
type=bool,
default=True,
help='contraint perturbed data with element ratio.'
)
augment_parser.add_argument(
'--dataset_num',
required=True,
'--target-size',
dest='target_size',
type=int,
help='num of dataset.'
)
augment_parser.add_argument(
'--perturb_factor',
type=float,
default=0.1,
help='Factor to perturb the data by.'
help='Requested number of augmented rows.',
)
augment_parser.add_argument('--seed', type=int, help='Random seed for reproducible augmentation.')
augment_parser.add_argument('--from-config', type=str, help='Load an augment plan/config JSON.')
augment_parser.add_argument('--write-config', type=str, help='Write the resolved augment plan/config to JSON.')
augment_parser.add_argument('--preview', action='store_true', help='Preview the resolved plan without executing augmentation.')
augment_parser.add_argument('--apply', action='store_true', help='Execute augmentation and write the output array.')
augment_parser.add_argument('--json', action='store_true', help='Print structured JSON output.')


def handle_command(args):
import numpy as np
from dfode_kit.cli.commands.augment_helpers import apply_augment_plan, dump_plan_json, resolve_augment_plan

if not args.preview and not args.apply and not args.write_config:
raise ValueError('Specify at least one action: --preview, --apply, or --write-config.')

plan = resolve_augment_plan(args)
json_result = {'command_type': 'augment'} if args.json else None

if args.write_config:
config_path = dump_plan_json(plan, args.write_config)
if args.json:
json_result['config_written'] = {'path': str(config_path)}
else:
print(f'Wrote augment config: {config_path}')

if args.preview:
if args.json:
json_result['plan'] = plan
else:
_print_human_plan(plan)

from dfode_kit.data import get_TPY_from_h5, random_perturb
if args.apply:
result = apply_augment_plan(plan, quiet=args.json)
if args.json:
json_result['apply'] = result
else:
print(f"Completed augmentation from: {result['source']}")
print(f"output_path: {result['output_path']}")
print(f"returned_count: {result['returned_count']}")

print('Handling augment command')
print(f'Loading data from h5 file: {args.h5_file}')
data = get_TPY_from_h5(args.h5_file)
print('Data shape:', data.shape)
if args.json:
print(json.dumps(json_result, indent=2, sort_keys=True))

all_data = random_perturb(data, args.mech, args.dataset_num, args.heat_limit, args.element_limit)

np.save(args.output_file, all_data)
print('Saved augmented data shape:', all_data.shape)
print(f'Saved augmented data to {args.output_file}')
def _print_human_plan(plan: dict):
print('Resolved augment plan')
print(f"preset: {plan['preset']}")
print(f"source: {plan['source']}")
print(f"mechanism: {plan['mechanism']}")
print(f"save: {plan['save']}")
print(f"target_size: {plan['target_size']}")
print(f"seed: {plan['seed']}")
print('resolved:')
for key in sorted(plan['resolved']):
print(f" {key}: {plan['resolved'][key]}")
print('notes:')
for note in plan['notes']:
print(f' - {note}')
162 changes: 162 additions & 0 deletions dfode_kit/cli/commands/augment_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from __future__ import annotations

import io
import json
from contextlib import redirect_stdout
from dataclasses import dataclass
from pathlib import Path
from typing import Any


DEFAULT_AUGMENT_PRESET = 'random-local-combustion-v1'


@dataclass(frozen=True)
class AugmentPreset:
name: str
summary: str
resolved: dict[str, Any]
notes: list[str]


AUGMENT_PRESETS: dict[str, AugmentPreset] = {
DEFAULT_AUGMENT_PRESET: AugmentPreset(
name=DEFAULT_AUGMENT_PRESET,
summary='Current random local perturbation workflow with combustion-oriented defaults.',
resolved={
'heat_limit': False,
'element_limit': True,
},
notes=[
'This preset preserves the current default augmentation behavior on main.',
'The CLI intentionally keeps the public surface minimal; detailed tuning should happen through config round-trip or future preset revisions.',
],
)
}


def get_augment_preset(name: str) -> AugmentPreset:
try:
return AUGMENT_PRESETS[name]
except KeyError as exc:
raise ValueError(
f"Unknown augment preset: {name}. Available presets: {', '.join(sorted(AUGMENT_PRESETS))}"
) from exc


def resolve_augment_plan(args) -> dict[str, Any]:
if args.from_config:
plan = load_plan_json(args.from_config)
if plan.get('command_type') != 'augment':
raise ValueError(f"Unsupported command_type in config: {plan.get('command_type')}")

source = args.source or plan.get('source')
mech = args.mech or plan.get('mechanism')
save = args.save or plan.get('save')
preset_name = args.preset or plan.get('preset', DEFAULT_AUGMENT_PRESET)
target_size = args.target_size if args.target_size is not None else plan.get('target_size')
seed = args.seed if args.seed is not None else plan.get('seed')
else:
_validate_required_args(args, ('source', 'mech', 'preset', 'target_size'))
source = args.source
mech = args.mech
save = args.save
preset_name = args.preset
target_size = args.target_size
seed = args.seed

if args.apply and not save:
raise ValueError('The --save path is required when using --apply.')

preset = get_augment_preset(preset_name)
source_path = Path(source).resolve()
if not source_path.is_file():
raise ValueError(f'Source file does not exist: {source_path}')

mechanism_path = Path(mech).resolve()
if not mechanism_path.is_file():
raise ValueError(f'Mechanism file does not exist: {mechanism_path}')

plan = {
'schema_version': 1,
'command_type': 'augment',
'preset': preset.name,
'preset_summary': preset.summary,
'source': str(source_path),
'mechanism': str(mechanism_path),
'save': str(Path(save).resolve()) if save else None,
'target_size': int(target_size),
'seed': int(seed) if seed is not None else None,
'config_path': str(Path(args.from_config).resolve()) if args.from_config else None,
'notes': preset.notes,
'resolved': dict(preset.resolved),
}
return plan


def apply_augment_plan(plan: dict[str, Any], quiet: bool = False) -> dict[str, Any]:
import numpy as np

from dfode_kit.data import get_TPY_from_h5, random_perturb

source_path = Path(plan['source']).resolve()
output_path = Path(plan['save']).resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)

if quiet:
with redirect_stdout(io.StringIO()):
data = get_TPY_from_h5(source_path)
augmented = random_perturb(
data,
plan['mechanism'],
plan['target_size'],
plan['resolved']['heat_limit'],
plan['resolved']['element_limit'],
seed=plan.get('seed'),
)
else:
print('Handling augment command')
print(f'Loading data from h5 file: {source_path}')
data = get_TPY_from_h5(source_path)
print('Data shape:', data.shape)
augmented = random_perturb(
data,
plan['mechanism'],
plan['target_size'],
plan['resolved']['heat_limit'],
plan['resolved']['element_limit'],
seed=plan.get('seed'),
)
print('Saved augmented data shape:', augmented.shape)
print(f'Saved augmented data to {output_path}')

np.save(output_path, augmented)

return {
'event': 'augmentation_completed',
'source': str(source_path),
'output_path': str(output_path),
'preset': plan['preset'],
'requested_count': int(plan['target_size']),
'returned_count': int(augmented.shape[0]),
'feature_count': int(augmented.shape[1]) if augmented.ndim == 2 else None,
'seed': plan.get('seed'),
}


def dump_plan_json(plan: dict[str, Any], path: str | Path) -> Path:
output_path = Path(path).resolve()
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(plan, indent=2, sort_keys=True) + '\n', encoding='utf-8')
return output_path


def load_plan_json(path: str | Path) -> dict[str, Any]:
input_path = Path(path).resolve()
return json.loads(input_path.read_text(encoding='utf-8'))


def _validate_required_args(args, names: tuple[str, ...]):
missing = [f'--{name.replace("_", "-")}' for name in names if getattr(args, name) is None]
if missing:
raise ValueError(f'Missing required arguments: {", ".join(missing)}')
13 changes: 8 additions & 5 deletions dfode_kit/data/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ def random_perturb(
cq: float = 10,
inert_idx: int = -1,
time_step: float = 1e-6,
seed: int | None = None,
) -> np.ndarray:
import cantera as ct

rng = np.random.default_rng(seed)

array = array[array[:, 0] > frozenTem]

gas = ct.Solution(mech_path)
Expand Down Expand Up @@ -78,11 +81,11 @@ def random_perturb(

test_r = np.copy(array[j])

test_tmp[0] = test_r[0] + (maxT - minT) * (2 * np.random.rand() - 1.0) * alpha
test_tmp[1] = test_r[1] + (maxP - minP) * (2 * np.random.rand() - 1.0) * alpha * 20
test_tmp[-1] = test_r[-1] + (maxN2 - minN2) * (2 * np.random.rand() - 1) * alpha
test_tmp[0] = test_r[0] + (maxT - minT) * (2 * rng.random() - 1.0) * alpha
test_tmp[1] = test_r[1] + (maxP - minP) * (2 * rng.random() - 1.0) * alpha * 20
test_tmp[-1] = test_r[-1] + (maxN2 - minN2) * (2 * rng.random() - 1) * alpha
for i in range(2, array.shape[1] - 1):
test_tmp[i] = np.abs(test_r[i]) ** (1 + (2 * np.random.rand() - 1) * alpha)
test_tmp[i] = np.abs(test_r[i]) ** (1 + (2 * rng.random() - 1) * alpha)
test_tmp[2:-1] = test_tmp[2:-1] / np.sum(test_tmp[2:-1]) * (1 - test_tmp[-1])

if heat_limit:
Expand Down Expand Up @@ -133,7 +136,7 @@ def random_perturb(
print(num)

new_array = np.array(new_array)
new_array = new_array[np.random.choice(new_array.shape[0], size=dataset)]
new_array = new_array[rng.choice(new_array.shape[0], size=dataset)]
unique_array = np.unique(new_array, axis=0)
print(unique_array.shape)
return unique_array
Expand Down
Loading
Loading