diff --git a/.gitignore b/.gitignore index 0733b62..021ce59 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ logs/ -results/ \ No newline at end of file +prj/ +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index ecda004..4c722aa 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ An orchestration framework for co-optimizing neural network models on hardware p Once it is created: ```bash - conda activate nauticml + conda activate nauticML ``` 3. **Start Prefect and PostgreSQL services** diff --git a/config.yaml b/config.yaml deleted file mode 100644 index 2057492..0000000 --- a/config.yaml +++ /dev/null @@ -1,103 +0,0 @@ ---- -experiment: - seed: 42 - save_dir: ./results - save_model: model - gpus: [0] - ckpt_file: best_chkp.tf - -dataset: - name: mnist - data: - -model: - original: - logic: - name: lenet - is_quant: false - dropout_rate: 0.4 - p_rate: 0.05 - scale_factor: 0.3 - dropout_type: mc - num_bayes_layer: 3 - - -bayes_opt: - iteration: - terminate: - engine: - score: - summary: - control: - params: dict - metrics: dict - suggests: dict - - num_iter: 3 - seed: ((experiment.seed)) - tunable: - dropout_rate: - value: ((model.dropout_rate)) - space: [0.1, 0.2, 0.3, 0.4] - p_rate: - value: ((model.p_rate)) - space: [0.0, 0.1, 0.2] - num_bayes_layer: - value: ((model.num_bayes_layer)) - space: [1, 2, 3] - scale_factor: - value: ((model.scale_factor)) - space: [0.5, 1.0, 1.5] - - metrics: - accuracy: ((eval.accuracy)) - ece: ((eval.ece)) - aPE: ((eval.ape)) - FLOP: ((eval.flop)) - - score_weights: - accuracy: - weight: 0.25 - base: 0.99 - ece: - weight: 0.25 - base: -0.03 - aPE: - weight: 0.25 - base: 1.5 - FLOP: - weight: 0.25 - base: -5340192 - - -train: - optimizer: - id: ((bayes_opt.iteration)) - num_epoch: 3 - batch_size: 128 - learning_rate: 0.01 - validation_split: 0.1 - -eval: - ece: - ape: - accuracy: - flop: - - mc_samples: 5 - num_eval_images: 200 - num_bins: 10 - - -reporter: - log: - - bayes_opt.iteration - - dropout_rate_list - - p_rate - - num_bayes_layer - - scale_factor - - accuracy - - flops - - ece - - ape - - score diff --git a/config/config_lenet.yaml b/config/config_lenet.yaml new file mode 100644 index 0000000..9306912 --- /dev/null +++ b/config/config_lenet.yaml @@ -0,0 +1,145 @@ +--- + +experiment: + seed: 42 + save_dir: ./prj/exp_mnist_bayes_lenet_try + save_model: mnist_bayes_lenet_10samples_mc + gpus: [0] + ckpt_file: best_chkp.tf + +dataset: + name: mnist + data: + mean: 0.0 + std: 1.0 + +model: + original: + logic: + name: lenet + is_quant: false + dropout_rate: 0.4 + p_rate: 0.05 + scale_factor: 0.3 + dropout_type: mc + num_bayes_layer: 3 + +# NOTE: introduce typing so that we can type dictionaries too +S1: + accuracy: + weight: 0.25 + base: 0.99 + ece: + weight: -0.25 + base: 0.03 + ape: + weight: 0.25 + base: 1.5 + flops: + weight: -0.25 + base: 5340192 + + top_n: 3 + # special metric calculated from the other metrics + top_metric: score + + name: Balance + +S2: + accuracy: + weight: 0.75 + base: 0.99 + ece: + weight: 0.00 + base: 0.03 + ape: + weight: 0.00 + base: 1.5 + flops: + weight: -0.25 + base: 5340192 + + top_n: 1 + top_metric: flops + + name: Opt-Efficiency + +strategy: + strategies: [ S1 ] + curr_strategy: + curr_results: ((bayes_opt.summary)) + + terminate_strategies: + results: + save_dir: ((experiment.save_dir)) + + +bayes_opt: + iteration: + terminate: + engine: + score: + summary: + + curr_strategy: + + control: + params: dict + suggests: dict + + num_iter: 2 + seed: ((experiment.seed)) + tunable: + dropout_rate: + value: ((model.dropout_rate)) + space: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95] + # space: [0.1, 0.2, 0.3, 0.4] + p_rate: + value: ((model.p_rate)) + space: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95] + # space: [0.0, 0.1, 0.2] + num_bayes_layer: + value: ((model.num_bayes_layer)) + space: [1, 2, 3] + scale_factor: + value: ((model.scale_factor)) + # space: [0.5, 1.0, 1.5] + space: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95] + + metrics: + accuracy: ((eval.accuracy)) + ece: ((eval.ece)) + ape: ((eval.ape)) + flops: ((eval.flops)) + +train: + optimizer: + id: ((bayes_opt.iteration)) + num_epoch: 3 + batch_size: 128 + learning_rate: 0.01 + validation_split: 0.1 + +eval: + ece: + ape: + accuracy: + flops: + + mc_samples: 5 + num_eval_images: 200 + num_bins: 10 + + +reporter: + log: + - bayes_opt.iteration + - dropout_rate_list + - p_rate + - num_bayes_layer + - scale_factor + - accuracy + - flops + - ece + - ape + - score diff --git a/config/config_resnet.yaml b/config/config_resnet.yaml new file mode 100644 index 0000000..cbae8be --- /dev/null +++ b/config/config_resnet.yaml @@ -0,0 +1,145 @@ +--- + +experiment: + seed: 42 + save_dir: ./prj/exp_cifar_bayes_resnet + save_model: cifar_resnet_tmp_3bayeslayer_mc + gpus: [0, 1] + ckpt_file: best_chkp.tf + +dataset: + name: cifar10 + data: + mean: 0.0 + std: 1.0 + +model: + original: + logic: + name: resnet + is_quant: false + dropout_rate: 0.4 + p_rate: 0.05 + scale_factor: 0.3 + dropout_type: mc + num_bayes_layer: 3 + +# NOTE: introduce typing so that we can type dictionaries too +S1: + accuracy: + weight: 0.25 + base: 0.99 + ece: + weight: -0.25 + base: 0.03 + ape: + weight: 0.25 + base: 1.5 + flops: + weight: -0.25 + base: 5340192 + + top_n: 3 + # special metric calculated from the other metrics + top_metric: score + + name: Balance + +S2: + accuracy: + weight: 0.75 + base: 0.99 + ece: + weight: 0.00 + base: 0.03 + ape: + weight: 0.00 + base: 1.5 + flops: + weight: -0.25 + base: 5340192 + + top_n: 1 + top_metric: flops + + name: Opt-Efficiency + +strategy: + strategies: [ S1 ] + curr_strategy: + curr_results: ((bayes_opt.summary)) + + terminate_strategies: + results: + save_dir: ((experiment.save_dir)) + + +bayes_opt: + iteration: + terminate: + engine: + score: + summary: + + curr_strategy: + + control: + params: dict + suggests: dict + + num_iter: 2 + seed: ((experiment.seed)) + tunable: + dropout_rate: + value: ((model.dropout_rate)) + space: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95] + # space: [0.1, 0.2, 0.3, 0.4] + p_rate: + value: ((model.p_rate)) + space: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95] + # space: [0.0, 0.1, 0.2] + num_bayes_layer: + value: ((model.num_bayes_layer)) + space: [1, 2, 3] + scale_factor: + value: ((model.scale_factor)) + # space: [0.5, 1.0, 1.5] + space: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.80, 0.85, 0.90, 0.95] + + metrics: + accuracy: ((eval.accuracy)) + ece: ((eval.ece)) + ape: ((eval.ape)) + flops: ((eval.flops)) + +train: + optimizer: + id: ((bayes_opt.iteration)) + num_epoch: 3 + batch_size: 128 + learning_rate: 0.01 + validation_split: 0.1 + +eval: + ece: + ape: + accuracy: + flops: + + mc_samples: 10 + num_eval_images: 200 + num_bins: 10 + + +reporter: + log: + - bayes_opt.iteration + - dropout_rate_list + - p_rate + - num_bayes_layer + - scale_factor + - accuracy + - flops + - ece + - ape + - score diff --git a/environment.yml b/environment.yml index 930a098..fdb5f1e 100644 --- a/environment.yml +++ b/environment.yml @@ -10,6 +10,9 @@ dependencies: - tensorflow=2.12 - nvidia/label/cuda-11.8.0::cuda-nvcc - nvidia/label/cuda-11.8.0::cuda-toolkit + - matplotlib=3.7 + - seaborn=0.13.2 + - numpy<1.24 - pip - pip: - -r requirements.txt diff --git a/logic/pipeline.py b/logic/pipeline.py index 948fc11..8592b9f 100644 --- a/logic/pipeline.py +++ b/logic/pipeline.py @@ -175,14 +175,6 @@ def on_epoch_end(self, epoch, logs=None): callbacks.append(PrintEpochCallback()) - - - logger.info(f"p_rate: {cfg.model.p_rate}") - logger.info(f"batch_size: {cfg.training.batch_size}") - logger.info(f"num_epoch: {cfg.training.num_epoch}") - logger.debug(f"validation_split: {cfg.training.validation_split}") - - def hash_model(model): import hashlib import json diff --git a/nautic/config.py b/nautic/config.py new file mode 100644 index 0000000..c0c8717 --- /dev/null +++ b/nautic/config.py @@ -0,0 +1,11 @@ +# Config structure for the flowx +class FlowxConfig: + disable_nautic: bool = False + +flowx_cfg = FlowxConfig() + +# Config structure for the taskx +class TaskxConfig: + disable_nautic: bool = False + +taskx_cfg = TaskxConfig() \ No newline at end of file diff --git a/nautic/context.py b/nautic/context.py index 4b97bf4..8137790 100644 --- a/nautic/context.py +++ b/nautic/context.py @@ -1,12 +1,12 @@ -import os import re import yaml -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional from pydantic import BaseModel, create_model from colorama import Fore, Style -from .engine import Engine -from .flowx import flowx_cfg -from .taskx import taskx_cfg + +from nautic.engine import Engine +from nautic.config import flowx_cfg +from nautic.config import taskx_cfg # Simple wrapper for scalar alias values class AliasRef: @@ -62,10 +62,7 @@ def __set_model(self, model, target): def __repr__(self): return f"AliasRef(src={self.src} => target={self.target}" - - - -# Main Config wrapper +# This class is responsible to parse the yaml file into a python object class Context: # Type mapping for explicit string declarations __TYPE_MAP = { @@ -81,7 +78,7 @@ class Context: @staticmethod def create(cfg_yaml: str, - disable_nautic:bool=True, + disable_nautic: bool=True, log_level: str = None, **kwargs) -> BaseModel: @@ -102,11 +99,15 @@ def create(cfg_yaml: str, for kw in keywords: if kw in raw_yaml: raise ValueError(f"Configuration YAML must not contain '{kw}' keyword.") + + # inject placeolders for the keywords raw_yaml[kw] = '' + model_builder = Context.__build_model("Context", raw_yaml) + # aliases contains - (cfg, refs) = Context.__get_values(raw_yaml) + (cfg, refs) = Context.__get_values(raw_yaml) model = model_builder(**cfg) # resolve alias references @@ -138,6 +139,7 @@ def __resolve_type(value): elif isinstance(value, str) and value in Context.__TYPE_MAP: return (Optional[Context.__TYPE_MAP[value]], None) else: + # TODO: throw error on incorrect type thrown return (type(value), value) def build_fields(content, path=""): @@ -150,10 +152,12 @@ def build_fields(content, path=""): submodel_fields = build_fields(val, path=full_path) sub_model = create_model( f"{name}_{key}".title().replace("_", ""), **submodel_fields) + fields[key] = (sub_model, ...) elif isinstance(val, list): elem_type = type(val[0]) if val else Any fields[key] = (List[elem_type], val) + else: is_alias_reference = isinstance(val, str) and Context.ALIAS_PATTERN.match(val) if is_alias_reference: @@ -201,10 +205,6 @@ def __get_values(data, path="", refs=None): else: return data, refs - - - - def __getattr__(self, name): return getattr(self._data, name) diff --git a/nautic/engine.py b/nautic/engine.py index 26a3dc6..f2f4b78 100644 --- a/nautic/engine.py +++ b/nautic/engine.py @@ -29,7 +29,6 @@ def __load(self, path): module_name = "_".join(module_parts) # unique ID for importlib namespace_parts = module_parts[:-1] # all folders before the .py file - # Load module spec = importlib.util.spec_from_file_location(module_name, file_path) if not spec or not spec.loader: diff --git a/nautic/flowx.py b/nautic/flowx.py index 2184f9d..725392f 100644 --- a/nautic/flowx.py +++ b/nautic/flowx.py @@ -1,15 +1,12 @@ from functools import wraps -# ✅ Reuse or redefine config structure -class FlowxConfig: - disable_nautic = False - -flowx_cfg = FlowxConfig() +from nautic.context import Context +from nautic.config import flowx_cfg # Cache the flow import (initially None) _cached_flow = None -# cached import prefect +# cached import prefect, so that we load it only when necessary (e.g: prefect not needed in testing) def __get_flow(): global _cached_flow if _cached_flow is None: @@ -17,18 +14,24 @@ def __get_flow(): _cached_flow = flow return _cached_flow +# a context-passing function that optionally becomes a Prefect flow def flowx(_func=None, **f_kwargs): def decorator(fn): - fn._is_flowx = True # 👈 optional marker + # optional marker used for testing/introspection + fn._is_flowx = True + # wraps function so that it returns the context and keeps the function's documentation intact @wraps(fn) - def wrapper(ctx, *args, **kwargs): + def wrapper(ctx: Context, *args, **kwargs): fn(ctx, *args, **kwargs) return ctx + if flowx_cfg.disable_nautic: - return wrapper # Just a plain callable + return wrapper # just call the function else: - __flow = __get_flow() # ✅ Import and cache if needed + __flow = __get_flow() + + # applies the actual @flow wrapper return __flow(**f_kwargs)(wrapper) if _func is None: diff --git a/nautic/logger_prefect.py b/nautic/logger_prefect.py index a9611a7..62d9b54 100644 --- a/nautic/logger_prefect.py +++ b/nautic/logger_prefect.py @@ -66,6 +66,7 @@ def artifact(self, **kwargs): link_text=kwargs.get("text") or kwargs["link"], key=kwargs.get("key"), description=kwargs.get("description"), + client=None ) elif "table" in kwargs: diff --git a/nautic/logger_simple.py b/nautic/logger_simple.py index d8a2c4e..2a2c238 100644 --- a/nautic/logger_simple.py +++ b/nautic/logger_simple.py @@ -1,4 +1,3 @@ -import os from colorama import Fore, Style class Logger: diff --git a/nautic/tasks/dse/bayes_opt.py b/nautic/tasks/dse/bayes_opt.py index 3cbabc2..212b7e7 100644 --- a/nautic/tasks/dse/bayes_opt.py +++ b/nautic/tasks/dse/bayes_opt.py @@ -1,100 +1,100 @@ -import pandas as pd -from bayes_opt import BayesianOptimization -from nautic import taskx - -class BayesOpt: - @taskx - def bayesian_opt(ctx): - bo = ctx.bayes_opt - bo.score = None - log = ctx.log - - if bo.engine is None: - bo.iteration = 0 - bo.summary = [] - - pbounds = { } - tune_vals = { } - tune_space = { } - tune_type = { } - bo.control.params = { } - for key in bo.tunable.model_fields: - opt = getattr(bo.tunable, key) - if isinstance(opt.space.get(), list): - pbounds[key] = (0, len(opt.space.get()) - 0.001) - tune_type[key] = 'categorical' - elif isinstance(opt.space.get(), tuple) and len(opt.space.get()) == 2: - pbounds[key] = opt.space.get() - tune_type[key] = 'continuous' - else: - raise ValueError(f"Unsupported space type for {key}: {type(opt.space.get())}") - - tune_vals[key] = opt.value - tune_space[key] = opt.space - - bo.control.params['values'] = tune_vals - bo.control.params['space'] = tune_space - bo.control.params['type'] = tune_type - - bo.control.metrics = {} - metrics_values = {} - for key in bo.metrics.model_fields: - metrics_values[key] = getattr(bo.metrics, key) - bo.control.metrics['values'] = metrics_values - - score_weights = { } - for key in bo.score_weights.model_fields: - score_weights[key] = getattr(bo.score_weights, key) - bo.control.metrics['score_weights'] = score_weights - - bo.engine = BayesianOptimization( - f = None, - pbounds=pbounds, - random_state=bo.seed.get(), - allow_duplicate_points=True, - verbose=0 - ) - - - # Initial random points - for _ in range(1): - bo.control.suggests = dict(zip(pbounds.keys(), - bo.engine._space.random_sample())) - else: - bo.iteration += 1 - engine = bo.engine - score = 0 - for key in bo.control.metrics['values']: - metric_value = bo.control.metrics['values'][key].get() - base_value = bo.control.metrics['score_weights'][key].base - weight_value = bo.control.metrics['score_weights'][key].weight - - score += float(metric_value / base_value) * float(weight_value) - - bo.score = score - engine.register(params=bo.control.suggests, - target=bo.score) - bo.control.suggests = bo.engine.suggest() - - summary = { 'iteration': bo.iteration, - 'score': "n/a" if bo.score is None else round(bo.score, 4)} - - # set the parameters for other tasks - for key, value in bo.control.suggests.items(): - idx = int(value) - param_type = bo.control.params['type'][key] - if param_type == 'categorical': - metric_val = bo.control.params['space'][key].get()[idx] - else: # continuous - metric_val = value - summary[key] = metric_val - bo.control.params['values'][key].set(metric_val) - - bo.summary.append(summary) - log.artifact(key='bayes-iteration-summary', - table=bo.summary) - - - bo.terminate = not (bo.iteration < bo.num_iter) +# import pandas as pd +# from bayes_opt import BayesianOptimization +# from nautic import taskx + +# class BayesOpt: +# @taskx +# def bayesian_opt(ctx): +# bo = ctx.bayes_opt +# bo.score = None +# log = ctx.log + +# if bo.engine is None: +# bo.iteration = 0 +# bo.summary = [] + +# pbounds = { } +# tune_vals = { } +# tune_space = { } +# tune_type = { } +# bo.control.params = { } +# for key in bo.tunable.model_fields: +# opt = getattr(bo.tunable, key) +# if isinstance(opt.space.get(), list): +# pbounds[key] = (0, len(opt.space.get()) - 0.001) +# tune_type[key] = 'categorical' +# elif isinstance(opt.space.get(), tuple) and len(opt.space.get()) == 2: +# pbounds[key] = opt.space.get() +# tune_type[key] = 'continuous' +# else: +# raise ValueError(f"Unsupported space type for {key}: {type(opt.space.get())}") + +# tune_vals[key] = opt.value +# tune_space[key] = opt.space + +# bo.control.params['values'] = tune_vals +# bo.control.params['space'] = tune_space +# bo.control.params['type'] = tune_type + +# bo.control.metrics = {} +# metrics_values = {} +# for key in bo.metrics.model_fields: +# metrics_values[key] = getattr(bo.metrics, key) +# bo.control.metrics['values'] = metrics_values + +# score_weights = { } +# for key in bo.score_weights.model_fields: +# score_weights[key] = getattr(bo.score_weights, key) +# bo.control.metrics['score_weights'] = score_weights + +# bo.engine = BayesianOptimization( +# f = None, +# pbounds=pbounds, +# random_state=bo.seed.get(), +# allow_duplicate_points=True, +# verbose=0 +# ) + + +# # Initial random points +# for _ in range(1): +# bo.control.suggests = dict(zip(pbounds.keys(), +# bo.engine._space.random_sample())) +# else: +# bo.iteration += 1 +# engine = bo.engine +# score = 0 +# for key in bo.control.metrics['values']: +# metric_value = bo.control.metrics['values'][key].get() +# base_value = bo.control.metrics['score_weights'][key].base +# weight_value = bo.control.metrics['score_weights'][key].weight + +# score += float(metric_value / base_value) * float(weight_value) + +# bo.score = score +# engine.register(params=bo.control.suggests, +# target=bo.score) +# bo.control.suggests = bo.engine.suggest() + +# summary = { 'iteration': bo.iteration, +# 'score': "n/a" if bo.score is None else round(bo.score, 4)} + +# # set the parameters for other tasks +# for key, value in bo.control.suggests.items(): +# idx = int(value) +# param_type = bo.control.params['type'][key] +# if param_type == 'categorical': +# metric_val = bo.control.params['space'][key].get()[idx] +# else: # continuous +# metric_val = value +# summary[key] = metric_val +# bo.control.params['values'][key].set(metric_val) + +# bo.summary.append(summary) +# log.artifact(key='bayes-iteration-summary', +# table=bo.summary) + + +# bo.terminate = not (bo.iteration < bo.num_iter) diff --git a/nautic/taskx.py b/nautic/taskx.py index b7e6878..2ff8738 100644 --- a/nautic/taskx.py +++ b/nautic/taskx.py @@ -1,10 +1,7 @@ from functools import wraps -# ✅ Mutable global config -class TaskxConfig: - disable_nautic = False - -taskx_cfg = TaskxConfig() +from nautic.context import Context +from nautic.config import taskx_cfg # Cache the task import (initially None) _cached_task = None @@ -23,7 +20,7 @@ def taskx(_func=None, **t_kwargs): def decorator(fn): fn._is_taskx = True # 👈 marker for taskx detection @wraps(fn) - def wrapper(ctx, *args, **kwargs): + def wrapper(ctx: Context, *args, **kwargs): fn(ctx, *args, **kwargs) return ctx if taskx_cfg.disable_nautic: diff --git a/run.py b/run.py index 78fb24d..4577a5f 100644 --- a/run.py +++ b/run.py @@ -1,16 +1,20 @@ import os import warnings + os.environ["PREFECT_LOGGING_LEVEL"] = "INFO" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" warnings.filterwarnings("ignore", message="pkg_resources is deprecated as an API", category=UserWarning) warnings.filterwarnings("ignore", message=".*Pytorch converter is not enabled.*", category=UserWarning) from nautic import flowx, Context +from pathlib import Path #os.environ["PREFECT_API_URL"] = "http://work.local:4200/api" os.environ["PREFECT_API_URL"] = "http://localhost:4200/api" -ctx = Context.create("config.yaml", +# absolute path for config +BASE_DIR = Path(__file__).resolve().parent +ctx = Context.create(str(BASE_DIR / "config/config_lenet.yaml"), log_level="INFO", disable_nautic=False) @@ -22,33 +26,51 @@ def perform_optimization(ctx): log = ctx.log engine.keras.initialize_experiment() - engine.keras.get_dataset() - - while True: - engine.dse.bayesian_opt() - if ctx.bayes_opt.terminate: - break - engine.keras.get_model() - engine.keras.trust.build_bayesian_model() - engine.keras.train_model() - engine.keras.eval() - #print("Accuracy:", ctx.eval.accuracy) - #engine.keras.trust.eval() - - - - -ctx.eval.ece = 10 -ctx.eval.ape = 20 - -ctx.eval.flop = 1000 - - -perform_optimization(ctx) - - - - - - + engine.strategy.initialise_strategies() + + while not ctx.strategy.terminate_strategies: + log.info(f"We are using the strategy: {ctx.strategy.curr_strategy}") + + engine.keras.get_dataset() + engine.dse.initialise_bayesian_opt() + + while True: + # think about triggers + engine.dse.bayesian_opt() + if ctx.bayes_opt.terminate: + engine.strategy.save_results() + engine.strategy.next_strategy() + break + + engine.keras.get_model() + engine.keras.trust.build_bayesian_model() + engine.keras.train_model() + engine.keras.eval() + + best_summary = max(ctx.bayes_opt.summary, key=lambda x: x["metrics"]["score"]) + + params = best_summary["hyperparameters"] + log.info( + f"""Final parameters: + droupout rate: {params["dropout_rate"]} + p rate: {params["p_rate"]} + scale factor: {params["scale_factor"]} + num bayes later: {params["num_bayes_layer"]}""") + + metrics = best_summary["metrics"] + log.info( + f"""With performance metrics: + ece: {metrics["ece"]} + ape: {metrics["ape"]} + accuracy: {metrics["accuracy"]} + flops: {metrics["flops"]}""") + + engine.strategy.create_result_table() + engine.strategy.create_4d_pareto_figures() + engine.strategy.create_correlation_matrix() + + log.info("Finished with all the strategies for Bayesian Optimisation") + +if __name__ == "__main__": + perform_optimization(ctx) diff --git a/tasks/dse/bayes_opt.py b/tasks/dse/bayes_opt.py index c75d68c..002fbda 100644 --- a/tasks/dse/bayes_opt.py +++ b/tasks/dse/bayes_opt.py @@ -1,99 +1,122 @@ -import pandas as pd from bayes_opt import BayesianOptimization from nautic import taskx +from tasks.strategy.strategy import Strategy class BayesOpt: + + @taskx + def initialise_bayesian_opt(ctx): + bo = ctx.bayes_opt + bo.score = None + + bo.iteration = 0 + bo.summary = [] + + pbounds = { } + tune_vals = { } + tune_space = { } + bo.control.params = { } + + for key in bo.tunable.model_fields: + opt = getattr(bo.tunable, key) + if isinstance(opt.space, list): + pbounds[key] = (0, len(opt.space) - 0.001) + else: + raise ValueError(f"Unsupported space type for {key}: {type(opt.space)}") + + tune_vals[key] = opt.value + tune_space[key] = opt.space + + bo.control.params['values'] = tune_vals + bo.control.params['space'] = tune_space + + bo.engine = BayesianOptimization( + f = None, + pbounds=pbounds, + random_state=bo.seed.get(), + allow_duplicate_points=True + ) + + bo.curr_strategy = Strategy.get_curr_strategy_object(ctx) + @taskx def bayesian_opt(ctx): bo = ctx.bayes_opt + engine = bo.engine + bo.score = None log = ctx.log - if bo.engine is None: - bo.iteration = 0 - bo.summary = [] - - pbounds = { } - tune_vals = { } - tune_space = { } - bo.control.params = { } - for key in bo.tunable.model_fields: - opt = getattr(bo.tunable, key) - if isinstance(opt.space, list): - pbounds[key] = (0, len(opt.space) - 0.001) - else: - raise ValueError(f"Unsupported space type for {key}: {type(opt.space)}") - - tune_vals[key] = opt.value - tune_space[key] = opt.space - bo.control.params['values'] = tune_vals - bo.control.params['space'] = tune_space - - bo.control.metrics = {} - metrics_values = {} - for key in bo.metrics.model_fields: - metrics_values[key] = getattr(bo.metrics, key) - bo.control.metrics['values'] = metrics_values - - score_weights = { } - for key in bo.score_weights.model_fields: - score_weights[key] = getattr(bo.score_weights, key) - bo.control.metrics['score_weights'] = score_weights - - bo.engine = BayesianOptimization( - f = None, - pbounds=pbounds, - random_state=bo.seed.get(), - allow_duplicate_points=True - ) + if bo.iteration == 0: + # Initial random points, use rng to make deterministic + rng = bo.engine._random_state - # Initial random points for _ in range(1): - bo.control.suggests = dict(zip(pbounds.keys(), - bo.engine._space.random_sample())) + bo.control.suggests = dict( + zip( + bo.tunable.model_fields, + rng.uniform( + bo.engine._space.bounds[:, 0], + bo.engine._space.bounds[:, 1] + ) + ) + ) + else: - bo.iteration += 1 - engine = bo.engine - - score = 0 - for key in bo.control.metrics['values']: - metric_value = bo.control.metrics['values'][key].get() - base_value = bo.control.metrics['score_weights'][key].base - weight_value = bo.control.metrics['score_weights'][key].weight - - score += float(metric_value / base_value) * float(weight_value) - - bo.score = score - engine.register(params=bo.control.suggests, - target=bo.score) + BayesOpt.record_iteration(bo, engine, log) bo.control.suggests = bo.engine.suggest() - summary = { 'iteration': bo.iteration, - 'score': "n/a" if bo.score is None else round(bo.score, 4)} + bo.iteration += 1 + bo.terminate = not (bo.iteration <= bo.num_iter) - # set the parameters for other tasks - for key, value in bo.control.suggests.items(): - idx = int(value) - metric_val = bo.control.params['space'][key][idx] - summary[key] = metric_val - bo.control.params['values'][key].set(metric_val) + metric_values = BayesOpt.suggest_to_values(bo) - bo.summary.append(summary) - log.artifact(key='bayes-iteration-summary', - table=bo.summary) + # update the refs to reflect the new values + for key, value in metric_values.items(): + bo.control.params['values'][key].set(value) + # Records current iteration suggest into a summary + @staticmethod + def record_iteration(bo, engine, log): + score = 0 + summary = { + 'iteration': bo.iteration + } - bo.terminate = not (bo.iteration < bo.num_iter) + metric_values = {} + for metric in bo.metrics.model_fields: + metric_value = getattr(bo.metrics, metric).get() + curr_metric_params = getattr(bo.curr_strategy, metric) + score += float(metric_value / curr_metric_params.base) * float(curr_metric_params.weight) + metric_values[metric] = round(metric_value, 4) + metric_values["score"] = score + summary["metrics"] = metric_values + + bo.score = score + summary["hyperparameters"] = BayesOpt.suggest_to_values(bo) + + bo.summary.append(summary) + log.artifact(key=f'bayes-iteration-summary-strategy-{bo.curr_strategy.name}'.lower(), + table=bo.summary) - # cfg.model.dropout_rate = cfg.search_space.dropout_rate_list[int(tune_params["dropout_rate"])] - # cfg.model.p_rate = cfg.search_space.p_rate_list[int(tune_params["p_rate"])] - # cfg.model.num_bayes_layer = cfg.search_space.num_bayes_layer_list[int(tune_params["num_bayes_layer"])] - # cfg.model.scale_factor = cfg.search_space.scale_factor_list[int(tune_params["scale_factor"])] + engine.register(params=bo.control.suggests, + target=bo.score) + # This method converts the suggest into a dictionary of hyper-parameters + @staticmethod + def suggest_to_values(bo): + hyperparams = {} + for key, value in bo.control.suggests.items(): + idx = int(value) + hyperparams[key] = bo.control.params['space'][key][idx] + + return hyperparams + + # # Create a table artifact # create_table_artifact( # key=f"bayes-iteration-{cfg.bayes_opt.iteration}", @@ -109,5 +132,4 @@ def bayesian_opt(ctx): # ], # description="Bayesian Optimization Step Summary" # ) - # return cfg - + # return cfg \ No newline at end of file diff --git a/tasks/keras/dataset.py b/tasks/keras/dataset.py index 111c757..b7db9f9 100644 --- a/tasks/keras/dataset.py +++ b/tasks/keras/dataset.py @@ -2,6 +2,7 @@ import numpy as np from tensorflow.keras.datasets import mnist from tensorflow.keras.utils import to_categorical +from logic.datasets import CIFAR10Data from nautic import taskx class KerasDataset: @@ -34,7 +35,7 @@ def get_dataset(ctx): x_test /= 256.0 y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) - elif ctx.dataset == "cifar10": + elif name == "cifar10": cifar10_data = CIFAR10Data() x_train, y_train, x_test, y_test = cifar10_data.get_data(subtract_mean=True) @@ -48,7 +49,6 @@ def get_dataset(ctx): x_train = x_train[mask] y_train = y_train[mask] - print('num train:%d num val:%d' % (num_train, num_val)) data = (x_train, y_train, x_val, y_val, x_test, y_test) else: raise NotImplementedError("Dataset not supoorted") diff --git a/tasks/keras/eval.py b/tasks/keras/eval.py index 6dbfc88..a7533ce 100644 --- a/tasks/keras/eval.py +++ b/tasks/keras/eval.py @@ -2,6 +2,7 @@ import numpy as np import tensorflow as tf from tensorflow.keras.models import load_model +import tensorflow_probability as tfp from tensorflow.python.framework.convert_to_constants import ( convert_variables_to_constants_v2_as_graph, ) @@ -12,16 +13,38 @@ class KerasEval: @taskx def eval(ctx): - ctx.eval.accuracy = KerasEval.evaluate_accuracy(ctx) - #flops1 = KerasEval.eval_flop(ctx, True) - flops2 = KerasEval.eval_flop(ctx) - #print(f"FLOPS with batch size A: {flops1}, with batch size B: {flops2}") - - def evaluate_accuracy(ctx): - model = load_model(ctx.experiment.ckpt_file) y_prob = model.predict(ctx.dataset.data["x_test"]) + ctx.eval.accuracy = KerasEval.evaluate_accuracy(ctx, y_prob) + ctx.eval.ece = KerasEval.evaluate_ece(ctx, y_prob) + ctx.eval.ape = KerasEval.evaluate_ape(ctx, model) + ctx.eval.flops = KerasEval.evaluate_flops(ctx) + + def evaluate_ece(ctx, y_prob) -> float: + y_logits = np.log(y_prob/(1-y_prob + 1e-15)) + + # we use CPU device as TF_DETERMINISTIC_OPS is not implemented in tf.math.bincount(x) + with tf.device('/CPU:0'): + ece_keras = tfp.stats.expected_calibration_error(num_bins=ctx.eval.num_bins, + logits=y_logits, labels_true=np.argmax(ctx.dataset.data["y_test"],axis=1), labels_predicted=np.argmax(y_prob,axis=1)) + + return float(ece_keras) + + def evaluate_ape(ctx, model) -> float: + def entropy(output): + batch_size = output.shape[0] + entropy = -np.sum(np.log(output+1e-8)*output)/batch_size + return entropy + + x = ctx.dataset.data["x_train"] + + # TODO: ask about mean - should be hard-coded? + x_noise = np.random.normal(ctx.dataset.mean, ctx.dataset.std, size=x.shape).astype(x.dtype) + + return entropy(model.predict(np.ascontiguousarray(x_noise))) + + def evaluate_accuracy(ctx, y_prob): accuracy = float(accuracy_score( np.argmax(ctx.dataset.data["y_test"], axis=1), np.argmax(y_prob, axis=1) @@ -29,8 +52,7 @@ def evaluate_accuracy(ctx): return accuracy - - def eval_flop(ctx): + def evaluate_flops(ctx): """ Calculate FLOPS for tf.keras.Model or tf.keras.Sequential . Ignore operations used in only training mode such as Initialization. @@ -48,6 +70,7 @@ def eval_flop(ctx): inputs = [ tf.TensorSpec([batch_size] + inp.shape[1:], inp.dtype) for inp in model.inputs ] + real_model = tf.function(model).get_concrete_function(inputs) frozen_func, _ = convert_variables_to_constants_v2_as_graph(real_model) diff --git a/tasks/keras/experiment.py b/tasks/keras/experiment.py index 6bbdd82..419b650 100644 --- a/tasks/keras/experiment.py +++ b/tasks/keras/experiment.py @@ -1,5 +1,13 @@ import os +import shutil +from typing import List + +os.environ['TF_DETERMINISTIC_OPS'] = '1' # Force TF deterministic ops +os.environ['TF_CUDNN_DETERMINISTIC'] = '1' # CuDNN determinism for certain layers +# os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0' # Disables AMP which can introduce nondeterminism + import tensorflow as tf + import random import numpy as np from nautic import taskx @@ -8,7 +16,7 @@ class KerasExperiment: @taskx def initialize_experiment(ctx): - def configure_gpus(gpu_indices): + def configure_gpus(use_cpu: bool, gpu_indices : List[int] | None): """ Configures TensorFlow to use only the specified GPU indices. @@ -16,10 +24,21 @@ def configure_gpus(gpu_indices): gpu_indices (list of int): List of GPU indices to make visible to TensorFlow. Use an empty list to disable GPU usage. """ - gpus = tf.config.list_physical_devices('GPU') log = ctx.log - log.debug("OH NO! I AM A TEST EXPERIMENT") + log.debug("I AM A TEST EXPERIMENT - In DEBUG Mode") + + if use_cpu: + try: + # Make no GPUs visible (force CPU mode) + tf.config.set_visible_devices([], 'GPU') + log.info("🚫 GPU usage disabled — running on CPU only.") + except RuntimeError as e: + log.info("❌ RuntimeError while disabling GPU:", e) + finally: + return + + gpus = tf.config.list_physical_devices('GPU') if not gpus: log.warning("⚠️ No GPUs found.") @@ -27,7 +46,7 @@ def configure_gpus(gpu_indices): else: log.debug(f"Found {len(gpus)} GPU(s): {[gpu.name for gpu in gpus]}") - if gpu_indices: + if gpu_indices and len(gpu_indices) > 0: try: selected_gpus = [gpus[i] for i in gpu_indices] tf.config.set_visible_devices(selected_gpus, 'GPU') @@ -39,23 +58,27 @@ def configure_gpus(gpu_indices): except RuntimeError as e: print("❌ RuntimeError during GPU configuration:", e) else: - try: - # Make no GPUs visible (force CPU mode) - tf.config.set_visible_devices([], 'GPU') - print("🚫 GPU usage disabled — running on CPU only.") - except RuntimeError as e: - print("❌ RuntimeError while disabling GPU:", e) + log.warning("⚠️ CPU used by default as no CPU or GPU indices provided are empty") + return configure_gpus(True, []) + + save_dir = ctx.experiment.save_dir - os.makedirs(ctx.experiment.save_dir, exist_ok=True) - ctx.experiment.save_dir = os.path.abspath(ctx.experiment.save_dir) - ctx.experiment.ckpt_file = os.path.join(ctx.experiment.save_dir, ctx.experiment.ckpt_file) + if os.path.exists(save_dir): + shutil.rmtree(save_dir) + + os.makedirs(save_dir) + ctx.experiment.save_dir = os.path.abspath(save_dir) + ctx.experiment.ckpt_file = os.path.join(save_dir, ctx.experiment.ckpt_file) seed = ctx.experiment.seed + os.environ['PYTHONHASHSEED'] = str(seed) + random.seed(seed) np.random.seed(seed) + tf.keras.utils.set_random_seed(seed) tf.random.set_seed(seed) - configure_gpus(ctx.experiment.gpus) - + use_cpu = getattr(ctx.experiment, "cpu", False) + configure_gpus(use_cpu, getattr(ctx.experiment, "gpus", [])) diff --git a/tasks/keras/model_factory.py b/tasks/keras/model_factory.py index 714726b..20ed802 100644 --- a/tasks/keras/model_factory.py +++ b/tasks/keras/model_factory.py @@ -8,6 +8,7 @@ from tasks.keras.models.lenet import LeNet from nautic import taskx +from tasks.keras.models.resnet import ResNet18 class KerasModels: @taskx @@ -25,7 +26,8 @@ def get_model(ctx): factory_quant = { } factory_nquant = { - "lenet": LeNet + "lenet": LeNet, + "resnet": ResNet18 } if ctx.model.is_quant: @@ -34,17 +36,14 @@ def get_model(ctx): model_builder = factory_nquant.get(ctx.model.name, None) if not model_builder: - raise NotImplementedError(f"Model '{ctx.model.name}' (quant: {ctx.mode.is_quant}) not supported") - + raise NotImplementedError(f"Model '{ctx.model.name}' (quant: {ctx.model.is_quant}) not supported") ctx.model.logic = model_builder(ctx) ctx.model.original = clone_model(ctx.model.logic) - - KerasModels.prune(ctx) - + @staticmethod def prune(ctx): if ctx.model.p_rate == 0.0: return diff --git a/tasks/keras/models/lenet.py b/tasks/keras/models/lenet.py index 4b8b146..e676ba9 100644 --- a/tasks/keras/models/lenet.py +++ b/tasks/keras/models/lenet.py @@ -1,10 +1,10 @@ from tensorflow.keras.models import Sequential from tensorflow.keras import activations -from keras.layers import Dense, Activation, \ +from tensorflow.keras.layers import Dense, Activation, \ Flatten, \ Conv2D, MaxPool2D -from keras.optimizers import SGD +from tensorflow.keras.optimizers import SGD from tasks.keras.trust.converter.uncertainty_factory import get_uncertainty_layer diff --git a/tasks/keras/models/resnet.py b/tasks/keras/models/resnet.py new file mode 100644 index 0000000..017ee5f --- /dev/null +++ b/tasks/keras/models/resnet.py @@ -0,0 +1,116 @@ +from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D +from keras.regularizers import l2 +from keras import layers + +from tensorflow.keras.models import Model +from tensorflow.keras.optimizers import SGD + +from tasks.keras.trust.converter.dropout.mc_model import BayesianDropout + +############################## ResNet #################################### +# Get from https://github.com/jerett/Keras-CIFAR10 +def conv2d_bn(x, filters, kernel_size, weight_decay=.0, strides=(1, 1)): + layer = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same', + use_bias=False, + kernel_regularizer=l2(weight_decay) + )(x) + layer = BatchNormalization()(layer) + return layer + +def Insert_Bayesian_Layer(cfg, x): + if cfg.model.dropout_type == "mc": x = BayesianDropout(cfg.model.dropout_rate)(x) + else: raise NotImplementedError("dropout type is not supportred") + return x + +def conv2d_bn_relu(x, filters, kernel_size, weight_decay=.0, strides=(1, 1)): + layer = conv2d_bn(x, filters, kernel_size, weight_decay, strides) + layer = Activation('relu')(layer) + return layer + +def ResidualBlock(x, filters, kernel_size, weight_decay, downsample=True): + if downsample: + # residual_x = conv2d_bn_relu(x, filters, kernel_size=1, strides=2) + residual_x = conv2d_bn(x, filters, kernel_size=1, strides=2) + stride = 2 + else: + residual_x = x + stride = 1 + residual = conv2d_bn_relu(x, + filters=filters, + kernel_size=kernel_size, + weight_decay=weight_decay, + strides=stride, + ) + residual = conv2d_bn(residual, + filters=filters, + kernel_size=kernel_size, + weight_decay=weight_decay, + strides=1, + ) + out = layers.add([residual_x, residual]) + out = Activation('relu')(out) + return out + +def ResNet18(cfg, input_shape=(32, 32, 3), classes=10, num_bayes_loc=8, weight_decay=1e-4, base_filters=64): + new_base_filters = max(1, int(base_filters * cfg.model.scale_factor)) + input = Input(shape=input_shape) + x = input + num_nonbayes_layer = num_bayes_loc - cfg.model.num_bayes_layer - 1 + # x = conv2d_bn_relu(x, filters=64, kernel_size=(7, 7), weight_decay=weight_decay, strides=(2, 2)) + # x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) + x = conv2d_bn_relu(x, filters=new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, strides=(1, 1)) + + # # conv 2 + x = ResidualBlock(x, filters=new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=False) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + x = ResidualBlock(x, filters=new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=False) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + # # conv 3 + x = ResidualBlock(x, filters=2*new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=True) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + x = ResidualBlock(x, filters=2*new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=False) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + # # conv 4 + x = ResidualBlock(x, filters=4*new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=True) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + x = ResidualBlock(x, filters=4*new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=False) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + # # conv 5 + x = ResidualBlock(x, filters=8*new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=True) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + x = ResidualBlock(x, filters=8*new_base_filters, kernel_size=(3, 3), weight_decay=weight_decay, downsample=False) + # Insert Bayeian Layer, can be mc dropout or mask ensumble layer + if (num_nonbayes_layer < 0): x = Insert_Bayesian_Layer(cfg, x) + num_nonbayes_layer -= 1 + + x = AveragePooling2D(pool_size=(4, 4), padding='valid')(x) + x = Flatten()(x) + x = Dense(classes, activation='softmax')(x) + model = Model(input, x, name='ResNet18') + model.compile(optimizer=SGD(lr=cfg.train.learning_rate, momentum=0.9, nesterov=False), loss=['categorical_crossentropy'], metrics=['accuracy']) + # model.compile(optimizer=Adam(lr=args.lr, amsgrad=True), loss=['categorical_crossentropy'], metrics=['accuracy']) + return model \ No newline at end of file diff --git a/tasks/keras/svhn/utils.py b/tasks/keras/svhn/utils.py new file mode 100644 index 0000000..c099407 --- /dev/null +++ b/tasks/keras/svhn/utils.py @@ -0,0 +1,40 @@ +import math +from tensorflow.keras.callbacks import Callback +from tensorflow.keras import backend as K +from tensorflow.keras.optimizers.schedules import LearningRateSchedule + + +class CosineAnnealingScheduler(Callback): + """Cosine annealing scheduler. + """ + + def __init__(self, T_max, eta_max, eta_min=0, verbose=0): + super(CosineAnnealingScheduler, self).__init__() + self.T_max = T_max + self.eta_max = eta_max + self.eta_min = eta_min + self.verbose = verbose + + def on_epoch_begin(self, epoch, logs=None): + if not hasattr(self.model.optimizer, 'lr'): + raise ValueError('Optimizer must have a "lr" attribute.') + lr = self.eta_min + (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * epoch / self.T_max)) / 2 + K.set_value(self.model.optimizer.lr, lr) + if self.verbose > 0: + print('\nEpoch %05d: CosineAnnealingScheduler setting learning ' + 'rate to %s.' % (epoch + 1, lr)) + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + logs['lr'] = K.get_value(self.model.optimizer.lr) + +class Custom_CosineAnnealingScheduler(LearningRateSchedule): + def __init__(self, T_max, eta_max, eta_min=0): + super(Custom_CosineAnnealingScheduler, self).__init__() + self.T_max = T_max + self.eta_max = eta_max + self.eta_min = eta_min + + def __call__(self, epoch): + lr = self.eta_min + (self.eta_max - self.eta_min) * (1 + math.cos(math.pi * epoch / self.T_max)) / 2 + return lr diff --git a/tasks/keras/train.py b/tasks/keras/train.py index 2d64167..bba3c32 100644 --- a/tasks/keras/train.py +++ b/tasks/keras/train.py @@ -3,6 +3,10 @@ from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks from nautic import taskx +from tensorflow.keras.preprocessing.image import ImageDataGenerator + +from tasks.keras.svhn.utils import CosineAnnealingScheduler + class KerasTrain: @taskx def train_model(ctx): @@ -18,6 +22,12 @@ def train_model(ctx): model = ctx.model.logic dataset = ctx.dataset.data + class ProgressCallback(Callback): + def on_epoch_end(self, epoch, logs=None): + progress = (epoch / nepoch) * 100 + log.artifact(progress_id=pg_id, + progress=progress) + if ctx.model.name == "lenet": chkp = ModelCheckpoint( ctx.experiment.ckpt_file, @@ -35,20 +45,12 @@ def train_model(ctx): else: callbacks = [chkp] - pg_id = log.artifact( progress=0.0, - description="Performing training", + description="Performing training for LeNet", ) nepoch = ctx.train.num_epoch - 1 - class ProgressCallback(Callback): - def on_epoch_end(self, epoch, logs=None): - progress = (epoch / nepoch) * 100 - log.artifact(progress_id=pg_id, - progress=progress) - - callbacks.append(ProgressCallback()) train_stat = model.fit( @@ -62,8 +64,54 @@ def on_epoch_end(self, epoch, logs=None): log.artifact(table=train_stat.history, key=f"train-results-{ctx.train.id.get()}", - description="Training results",) + description="Training results") + + elif ctx.model.name == "resnet": + datagen = ImageDataGenerator( + featurewise_center=False, # set input mean to 0 over the dataset + samplewise_center=False, # set each sample mean to 0 + featurewise_std_normalization=False, # divide inputs by std of the dataset + samplewise_std_normalization=False, # divide each input by its std + zca_whitening=False, # apply ZCA whitening + # rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) + width_shift_range=4, # randomly shift images horizontally (fraction of total width) + height_shift_range=4, # randomly shift images vertically (fraction of total height) + horizontal_flip=True, # randomly flip images + vertical_flip=False, # randomly flip images + ) + + train_gen = datagen.flow(dataset['x_train'], dataset['y_train'], batch_size=ctx.train.batch_size) + reduce_lr = CosineAnnealingScheduler(T_max=ctx.train.num_epoch, eta_max=ctx.train.learning_rate, eta_min=1e-4) + + chkp = ModelCheckpoint( + ctx.experiment.ckpt_file, + monitor="val_loss", + verbose=1, + save_best_only=True, + save_weights_only=False, + mode="auto", + save_freq="epoch", + ) + if ctx.model.p_rate != 0.0: + callbacks = [reduce_lr, chkp, pruning_callbacks.UpdatePruningStep() ] + else: + callbacks = [reduce_lr, chkp] + + pg_id = log.artifact( + progress=0.0, + description="Performing training for ResNet", + ) + nepoch = ctx.train.num_epoch - 1 + callbacks.append(ProgressCallback()) + train_stat = model.fit_generator(generator=train_gen, + epochs=ctx.train.num_epoch, + callbacks=callbacks, + validation_data=(dataset['x_val'], dataset['y_val']), + ) + log.artifact(table=train_stat.history, + key=f"train-results-{ctx.train.id.get()}", + description="Training results") \ No newline at end of file diff --git a/tasks/keras/trust/converter/dropout/mc_model.py b/tasks/keras/trust/converter/dropout/mc_model.py index a40bf1c..689d368 100644 --- a/tasks/keras/trust/converter/dropout/mc_model.py +++ b/tasks/keras/trust/converter/dropout/mc_model.py @@ -2,6 +2,37 @@ from tasks.keras.trust.converter.dropout.inference_layer import InferenceDropoutLayer import tensorflow as tf +from keras import layers + +class BayesianDropout(tf.keras.layers.Layer): + r""" + Applies Dropout to the input. + """ + def __init__(self, drop_rate=0.5, seed=None, **kwargs): + if drop_rate < 0 or drop_rate > 1: + raise ValueError("dropout probability has to be between 0 and 1, " + "but got {}".format(drop_rate)) + super(BayesianDropout, self).__init__(**kwargs) + self.drop_rate = drop_rate + # if seed set to None, random output will be given + self.seed = seed + + def get_config(self): + # default seed is 0 + seed = 0 if self.seed is None else self.seed + config = { + 'drop_rate': self.drop_rate, + 'seed': seed + } + base_config = super(BayesianDropout, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) + + def call(self, input): + return layers.Dropout(rate=self.drop_rate, seed=self.seed)(input, training=True) class MonteCarloDropoutModel(HlsLayer): r""" diff --git a/tasks/strategy/results.py b/tasks/strategy/results.py new file mode 100644 index 0000000..1186264 --- /dev/null +++ b/tasks/strategy/results.py @@ -0,0 +1,443 @@ +from nautic import taskx + +from heapq import nlargest +import pandas as pd +import numpy as np + +import seaborn as sns +import matplotlib.pyplot as plt +from scipy.spatial import ConvexHull + +import os + +class Results: + @taskx + def create_result_table(ctx): + strat = ctx.strategy + log = ctx.log + + table = Results.get_top_n_results(strat) + + log.artifact(key='all-top-results-table', table=table) + + # save table as markdown in figures subfolder of save dir + df = pd.DataFrame(table) + + columns = ["Opt-Mode"] + [c for c in df.columns if c != "Opt-Mode"] + df = df[columns] + md_df = df.to_markdown(index=False) + + log.info("\nTop strategy results:\n%s", md_df) + + md_path = Results.get_figures_path("all_top_results_table.md", strat.save_dir.get()) + with open(md_path, "w") as f: + f.write("# Top Strategy Results\n\n") + f.write(md_df) + + @taskx + def create_4d_pareto_figures(ctx): + strat = ctx.strategy + log = ctx.log + + results = strat.results + optimal_table = Results.get_top_n_results(strat) + + # collect all the points into a list + all_points = [] + for _, results_list in results.items(): + for r in results_list: + metrics = r["metrics"].copy() + if "score" in metrics: + del metrics["score"] + + metric_names_local = list(metrics.keys()) + + if len(metric_names_local) != 4: + log.error("❌ We are trying to display a 4D-pareto front without 4 metrics. Not supported") + return + + all_points.append([metrics[m] for m in metric_names_local]) + + points = np.array(all_points) + maximize_local = [True]*4 + + # get only the points on the pareto frontier + pareto_mask = Results.is_pareto_efficient(points, maximize_local) + pareto_points = points[pareto_mask] + + fig = plt.figure(figsize=(10, 8)) + ax = fig.add_subplot(111, projection='3d') + + # Axes: first 3 metrics, 4th metric is color bar + x, y, z = points[:, 0], points[:, 1], points[:, 2] + color_metric = points[:, 3] + + # Plot all the points of the bayes opt + sc_normal = ax.scatter(x, y, z, c=color_metric, cmap='coolwarm', s=50, label='Configurations') + + # Plot all the points from the convex hull as the pareto frontier + if len(pareto_points) >= 4: + hull = ConvexHull(pareto_points[:, :3]) + # Hull lines + for simplex in hull.simplices: + ax.plot(pareto_points[simplex, 0], + pareto_points[simplex, 1], + pareto_points[simplex, 2], + "k-", linewidth=1.5) + + # Pareto surface points (black outline) + hull_indices = np.unique(hull.simplices.flatten()) + pareto_surface_points = pareto_points[hull_indices] + ax.scatter(pareto_surface_points[:, 0], + pareto_surface_points[:, 1], + pareto_surface_points[:, 2], + c=color_metric[pareto_mask][hull_indices], + cmap='coolwarm', + s=100, + edgecolors='k', + linewidths=2, + label='Pareto front (surface)') + + # Highlight the optimal points found from the different strategies by labelling them and adding a green cross + if len(optimal_table) > 0: + opt_points = [] + opt_labels = [] + for row in optimal_table: + metrics = {k: v for k, v in row.items() if k not in ['Opt-Mode', 'hyperparameters']} + opt_points.append([metrics[m] for m in metric_names_local]) + opt_labels.append(row['Opt-Mode']) + + # Convert to numpy array and deduplicate + opt_points = np.array(opt_points) + opt_labels = np.array(opt_labels) + unique_rows, unique_indices = np.unique(opt_points, axis=0, return_index=True) + opt_points = unique_rows + opt_labels = opt_labels[unique_indices] + + ax.scatter(opt_points[:, 0], opt_points[:, 1], opt_points[:, 2], + marker='x', color='green', s=120, linewidths=3, zorder=10, label='Optimal solutions') + + for i in range(len(opt_points)): + ax.text(opt_points[i, 0], opt_points[i, 1], opt_points[i, 2], + opt_labels[i], color='green', fontsize=8) + + # Colorbar and labels + plt.colorbar(sc_normal, ax=ax, label=metric_names_local[3]) + ax.set_xlabel(metric_names_local[0]) + ax.set_ylabel(metric_names_local[1]) + ax.set_zlabel(metric_names_local[2]) + ax.set_title("4D Pareto Front of Bayes Opt results") + + ax.legend() + + plt_path = Results.get_figures_path("4d-pareto-front-plot.png", strat.save_dir.get()) + plt.savefig(plt_path, dpi=300, bbox_inches='tight') + + # TODO: actually fix the artifact to upload a real image + log.artifact(key='4d-pareto-front-plot', image=plt_path) + + @taskx + def create_correlation_matrix(ctx): + strat = ctx.strategy + log = ctx.log + + points = [] + for _, results_list in strat.results.items(): + for result in results_list: + row = {} + + row.update(result.get("hyperparameters", {})) + row.update(result.get("metrics", {})) + + if "score" in row: + del row["score"] + + points.append(row) + + if len(points) < 2: + log.warning("⚠️ There are not enough points (< 2) to do Spearman correlation on these results.") + + df = pd.DataFrame(points) + corr = df.corr(method="pearson") + + plt.figure(figsize=(8, 6)) + + sns.heatmap( + corr, + annot=True, + cmap="coolwarm", + square=True, + fmt=".2f", + cbar_kws={"shrink": 0.8}, + ) + + plt.title("Correlation Matrix between metrics and hyperparameters across all configurations") + plt.tight_layout() + + plt_path = Results.get_figures_path("correlation_heatmap.png", strat.save_dir.get()) + plt.savefig(plt_path, dpi=300, bbox_inches='tight') + + # TODO: actually fix the artifact to upload a real image + log.artifact(key='4d-pareto-front-plot', image=plt_path) + + @staticmethod + def get_figures_path(path, save_dir): + figures_dir = os.path.join(save_dir, "figures") + os.makedirs(figures_dir, exist_ok=True) + + return os.path.join(figures_dir, path) + + @staticmethod + def get_top_n_results(strat): + results = strat.results + table = [] + + for strat_index, strat_results in results.items(): + strat_obj = strat.strategies[strat_index] + + # just log the top n results according to the top metric in the table for particular strategy + for i, top_result in enumerate(nlargest(strat_obj.top_n, strat_results, key=lambda r: r["metrics"][strat_obj.top_metric])): + row_name = strat_obj.name + + # add top name only if we keep track of multiple top results + if strat_obj.top_n > 1: + row_name = f"{row_name}-Top{i+1}" + + row = {"Opt-Mode": row_name} + + row.update(top_result.get("hyperparameters", {})) + row.update(top_result.get("metrics", {})) + + table.append(row) + + return table + + # Function to gauge pareto frontier + @staticmethod + def is_pareto_efficient(points, maximize): + data = points.copy() + for i, m in enumerate(maximize): + if m: + data[:, i] = -data[:, i] + n = data.shape[0] + is_efficient = np.ones(n, dtype=bool) + for i, c in enumerate(data): + if is_efficient[i]: + is_efficient[is_efficient] = np.any(data[is_efficient] < c, axis=1) + is_efficient[i] = True + return is_efficient + + + +# if __name__ == "__main__": +# # Your data (as a Python list of dicts) +# data = [ +# { +# "iteration": 1, +# "metrics": { +# "accuracy": 0.2371, +# "ece": 0.0963, +# "ape": 1.7475, +# "flops": 1917120, +# "score": -0.54116058430776 +# }, +# "hyperparameters": { +# "dropout_rate": 0.4, +# "p_rate": 0.95, +# "num_bayes_layer": 3, +# "scale_factor": 0.6 +# } +# }, +# { +# "iteration": 2, +# "metrics": { +# "accuracy": 0.1786, +# "ece": 0.0174, +# "ape": 1.9989, +# "flops": 3931170, +# "score": 0.049543874625775336 +# }, +# "hyperparameters": { +# "dropout_rate": 0.95, +# "p_rate": 0.7, +# "num_bayes_layer": 1, +# "scale_factor": 0.9 +# } +# }, +# { +# "iteration": 3, +# "metrics": { +# "accuracy": 0.5984, +# "ece": 0.0336, +# "ape": 1.6632, +# "flops": 2506662, +# "score": 0.031364363946693244 +# }, +# "hyperparameters": { +# "dropout_rate": 0.2, +# "p_rate": 0.1, +# "num_bayes_layer": 2, +# "scale_factor": 0.7 +# } +# }, +# { +# "iteration": 4, +# "metrics": { +# "accuracy": 0.2098, +# "ece": 0.0558, +# "ape": 2.0483, +# "flops": 452435, +# "score": -0.09194228433872143 +# }, +# "hyperparameters": { +# "dropout_rate": 0.45, +# "p_rate": 0.3, +# "num_bayes_layer": 1, +# "scale_factor": 0.25 +# } +# }, +# { +# "iteration": 5, +# "metrics": { +# "accuracy": 0.1756, +# "ece": 0.0206, +# "ape": 1.8578, +# "flops": 4336525, +# "score": -0.020782943634756024 +# }, +# "hyperparameters": { +# "dropout_rate": 0.95, +# "p_rate": 0.7, +# "num_bayes_layer": 1, +# "scale_factor": 0.95 +# } +# } +# ] + +# data2 = [ +# { +# "iteration": 1, +# "metrics": { +# "accuracy": 0.2371, +# "ece": 0.0963, +# "ape": 1.7475, +# "flops": 1917120, +# "score": 0.08987163008371235 +# }, +# "hyperparameters": { +# "dropout_rate": 0.4, +# "p_rate": 0.95, +# "num_bayes_layer": 3, +# "scale_factor": 0.6 +# } +# }, +# { +# "iteration": 2, +# "metrics": { +# "accuracy": 0.1786, +# "ece": 0.0174, +# "ape": 1.9989, +# "flops": 3931170, +# "score": -0.04873389196493308 +# }, +# "hyperparameters": { +# "dropout_rate": 0.95, +# "p_rate": 0.7, +# "num_bayes_layer": 1, +# "scale_factor": 0.9 +# } +# }, +# { +# "iteration": 3, +# "metrics": { +# "accuracy": 0.5984, +# "ece": 0.0336, +# "ape": 1.6632, +# "flops": 2506662, +# "score": 0.3359844627309281 +# }, +# "hyperparameters": { +# "dropout_rate": 0.2, +# "p_rate": 0.1, +# "num_bayes_layer": 2, +# "scale_factor": 0.7 +# } +# }, +# { +# "iteration": 4, +# "metrics": { +# "accuracy": 0.7402, +# "ece": 0.0244, +# "ape": 1.7673, +# "flops": 1644813, +# "score": 0.4837559904213181 +# }, +# "hyperparameters": { +# "dropout_rate": 0.05, +# "p_rate": 0.05, +# "num_bayes_layer": 2, +# "scale_factor": 0.55 +# } +# }, +# { +# "iteration": 5, +# "metrics": { +# "accuracy": 0.1976, +# "ece": 0.0713, +# "ape": 2.2282, +# "flops": 51575, +# "score": 0.1472824965843925 +# }, +# "hyperparameters": { +# "dropout_rate": 0.05, +# "p_rate": 0.05, +# "num_bayes_layer": 3, +# "scale_factor": 0.05 +# } +# } +# ] + +# data_dict = {0: data} +# data_dict[1] = data2 + +# table = [] + +# strategies = [ +# { +# "top_n": 3, +# "top_metric": "score", +# "name": "Opt-Balance" +# }, { +# "top_n": 1, +# "top_metric": "flops", +# "name": "Opt-Efficiency" +# } +# ] + +# for strat_index, strat_results in data_dict.items(): +# strat_obj = strategies[strat_index] + +# # just log the top n results according to the top metric in the table for particular strategy +# for i, top_result in enumerate(nlargest(strat_obj["top_n"], strat_results, key=lambda r: r["metrics"][strat_obj["top_metric"]])): +# row_name = strat_obj["name"] + +# # add top name only if we keep track of multiple top results +# if strat_obj["top_n"] > 1: +# row_name = f"{row_name}-Top{i+1}" + +# row = {"Opt-Mode": row_name} + +# row.update(top_result.get("hyperparameters", {})) +# row.update(top_result.get("metrics", {})) + +# table.append(row) + + + +# plot_pareto_3d(data_dict, table) + + + + + + diff --git a/tasks/strategy/strategy.py b/tasks/strategy/strategy.py new file mode 100644 index 0000000..b1af6ad --- /dev/null +++ b/tasks/strategy/strategy.py @@ -0,0 +1,35 @@ +from nautic import taskx + +class Strategy: + + @taskx + def initialise_strategies(ctx): + strat = ctx.strategy + # set the actual strategy dict for each strategy string reference inside of bo.strategies + # NOTE: could make AliasRef accept dictionaries instead + strat.strategies = [getattr(ctx, s) for s in strat.strategies] + + strat.curr_strategy = 0 + strat.terminate_strategies = Strategy.terminate_strategy(strat) + strat.results = {} + + @taskx + def next_strategy(ctx): + strat = ctx.strategy + if not Strategy.terminate_strategy(strat): + strat.curr_strategy += 1 + + strat.terminate_strategies = Strategy.terminate_strategy(strat) + + @taskx + def save_results(ctx): + strat = ctx.strategy + strat.results[strat.curr_strategy] = strat.curr_results.get() + + @staticmethod + def terminate_strategy(strat): + return strat.curr_strategy >= len(strat.strategies) + + @staticmethod + def get_curr_strategy_object(ctx): + return ctx.strategy.strategies[ctx.strategy.curr_strategy] \ No newline at end of file diff --git a/test_gpu.py b/test_gpu.py index e9f004e..0145677 100644 --- a/test_gpu.py +++ b/test_gpu.py @@ -13,3 +13,4 @@ print("Result of matrix multiplication on GPU:\n", c.numpy()) else: print("No GPU found. Running on CPU.") +