diff --git a/.github/workflows/root-ci-config/buildconfig/global.txt b/.github/workflows/root-ci-config/buildconfig/global.txt index 41becbb38f9f0..5d4e4cbf49030 100644 --- a/.github/workflows/root-ci-config/buildconfig/global.txt +++ b/.github/workflows/root-ci-config/buildconfig/global.txt @@ -90,7 +90,7 @@ testing=ON tmva-cpu=ON tmva-gpu=OFF tmva-cudnn=OFF -tmva-pymva=OFF +tmva-pymva=ON tmva-rmva=OFF tmva-sofie=OFF tmva=ON diff --git a/README/ReleaseNotes/v640/index.md b/README/ReleaseNotes/v640/index.md index 76d020ddf2f4f..c597e47d2aebc 100644 --- a/README/ReleaseNotes/v640/index.md +++ b/README/ReleaseNotes/v640/index.md @@ -38,6 +38,7 @@ The following people have contributed to this new version: * Comparing C++ `nullptr` objects with `None` in Python now raises a `TypeError`, as announced in the ROOT 6.38 release notes. Use truth-value checks like `if not x` or `x is None` instead. * The `TGLIncludes.h` and `TGLWSIncludes.h` that were deprecated in ROOT 6.38 and scheduled for removal are gone now. Please include your required headers like `` or `` directly. * The GLEW headers (`GL/eglew.h`, `GL/glew.h`, `GL/glxew.h`, and `GL/wglew.h`) that were installed when building ROOT with `builtin_glew=ON` are no longer installed. This is done because ROOT is moving away from GLEW for loading OpenGL extensions. +* The TMVA `PyKeras` method is deprecated. It was broken by the API changes in Keras 3, released in November 2023 and part of TensorFlow 2.16 or newer. The `PyKeras` method will be removed in ROOT 6.42 (unless an updated implementation for Keras 3 that matches usage, performance and stability requirements will be found unexpectedly). ## Build System diff --git a/tmva/pymva/CMakeLists.txt b/tmva/pymva/CMakeLists.txt index 89ed2de8f754f..119eeccb9bb78 100644 --- a/tmva/pymva/CMakeLists.txt +++ b/tmva/pymva/CMakeLists.txt @@ -14,14 +14,12 @@ ROOT_STANDARD_LIBRARY_PACKAGE(PyMVA HEADERS TMVA/MethodPyAdaBoost.h TMVA/MethodPyGTB.h - TMVA/MethodPyKeras.h TMVA/MethodPyRandomForest.h TMVA/MethodPyTorch.h TMVA/PyMethodBase.h SOURCES src/MethodPyAdaBoost.cxx src/MethodPyGTB.cxx - src/MethodPyKeras.cxx src/MethodPyRandomForest.cxx src/MethodPyTorch.cxx src/PyMethodBase.cxx diff --git a/tmva/pymva/inc/LinkDef.h b/tmva/pymva/inc/LinkDef.h index 5082dc8949bf0..c8888bf045536 100644 --- a/tmva/pymva/inc/LinkDef.h +++ b/tmva/pymva/inc/LinkDef.h @@ -13,6 +13,5 @@ #pragma link C++ class TMVA::MethodPyRandomForest+; #pragma link C++ class TMVA::MethodPyAdaBoost+; #pragma link C++ class TMVA::MethodPyGTB+; -#pragma link C++ class TMVA::MethodPyKeras+; #pragma link C++ class TMVA::MethodPyTorch+; #endif diff --git a/tmva/pymva/inc/TMVA/MethodPyKeras.h b/tmva/pymva/inc/TMVA/MethodPyKeras.h deleted file mode 100644 index 2e05089431b02..0000000000000 --- a/tmva/pymva/inc/TMVA/MethodPyKeras.h +++ /dev/null @@ -1,118 +0,0 @@ -// @(#)root/tmva/pymva $Id$ -// Author: Stefan Wunsch - -/********************************************************************************** - * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * - * Package: TMVA * - * Class : MethodPyKeras * - * * - * * - * Description: * - * Interface for Keras python package which is a wrapper for the Theano and * - * Tensorflow libraries * - * * - * Authors (alphabetical): * - * Stefan Wunsch - KIT, Germany * - * * - * Copyright (c) 2016: * - * CERN, Switzerland * - * KIT, Germany * - * * - * Redistribution and use in source and binary forms, with or without * - * modification, are permitted according to the terms listed in LICENSE * - * (see tmva/doc/LICENSE) * - **********************************************************************************/ - -#ifndef ROOT_TMVA_MethodPyKeras -#define ROOT_TMVA_MethodPyKeras - -#include "TMVA/PyMethodBase.h" -#include - -namespace TMVA { - - class MethodPyKeras : public PyMethodBase { - - public : - - // constructors - MethodPyKeras(const TString &jobName, - const TString &methodTitle, - DataSetInfo &dsi, - const TString &theOption = ""); - MethodPyKeras(DataSetInfo &dsi, - const TString &theWeightFile); - ~MethodPyKeras(); - - void Train() override; - void Init() override; - void DeclareOptions() override; - void ProcessOptions() override; - - // Check whether the given analysis type (regression, classification, ...) - // is supported by this method - Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) override; - // Get signal probability of given event - Double_t GetMvaValue(Double_t *errLower, Double_t *errUpper) override; - std::vector GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) override; - // Get regression values of given event - std::vector& GetRegressionValues() override; - // Get class probabilities of given event - std::vector& GetMulticlassValues() override; - - const Ranking *CreateRanking() override { return nullptr; } - void TestClassification() override; - void AddWeightsXMLTo(void*) const override{} - void ReadWeightsFromXML(void*) override{} - void ReadWeightsFromStream(std::istream&) override {} // backward compatibility - void ReadWeightsFromStream(TFile&) override{} // backward compatibility - void ReadModelFromFile() override; - - void GetHelpMessage() const override; - - /// enumeration defining the used Keras backend - enum EBackendType { kUndefined = -1, kTensorFlow = 0, kTheano = 1, kCNTK = 2 }; - - /// Get the Keras backend (can be: TensorFlow, Theano or CNTK) - EBackendType GetKerasBackend(); - TString GetKerasBackendName(); - // flag to indicate we are using the Keras shipped with Tensorflow 2 - Bool_t UseTFKeras() const { return fUseTFKeras; } - - private: - - TString fFilenameModel; // Filename of the previously exported Keras model - UInt_t fBatchSize {0}; // Training batch size - UInt_t fNumEpochs {0}; // Number of training epochs - Int_t fNumThreads {0}; // Number of CPU threads (if 0 uses default values) - Int_t fVerbose; // Keras verbosity during training - Bool_t fUseTFKeras { true}; // use Keras from Tensorflow default is true - Bool_t fContinueTraining; // Load weights from previous training - Bool_t fSaveBestOnly; // Store only weights with smallest validation loss - Int_t fTriesEarlyStopping; // Stop training if validation loss is not decreasing for several epochs - TString fLearningRateSchedule; // Set new learning rate at specific epochs - TString fTensorBoard; // Store log files during training - TString fNumValidationString; // option string defining the number of validation events - TString fGpuOptions; // GPU options (for Tensorflow to set in session_config.gpu_options) - TString fUserCodeName; // filename of an optional user script that will be executed before loading the Keras model - TString fKerasString; // string identifying keras or tf.keras - - bool fModelIsSetup = false; // flag whether current model is setup for being used - bool fModelIsSetupForEval = false; // flag to indicate whether model is setup for evaluation - std::vector fVals; // variables array used for GetMvaValue - std::vector fOutput; // probability or regression output array used for GetMvaValue - UInt_t fNVars {0}; // number of variables - UInt_t fNOutputs {0}; // number of outputs (classes or targets) - TString fFilenameTrainedModel; // output filename for trained model - - void InitKeras(); // initialize Keras (importing the readed modules) - void SetupKerasModel(Bool_t loadTrainedModel); // setups the needed variables, loads the model - void SetupKerasModelForEval(); // optimizes model for evaluation - UInt_t GetNumValidationSamples(); // get number of validation events according to given option - - ClassDefOverride(MethodPyKeras, 0); - }; - -} // namespace TMVA - -#endif // ROOT_TMVA_MethodPyKeras diff --git a/tmva/pymva/src/MethodPyKeras.cxx b/tmva/pymva/src/MethodPyKeras.cxx deleted file mode 100644 index 38ee5d5185953..0000000000000 --- a/tmva/pymva/src/MethodPyKeras.cxx +++ /dev/null @@ -1,863 +0,0 @@ -// @(#)root/tmva/pymva $Id$ -// Author: Stefan Wunsch, 2016 - -#include -#include "TMVA/MethodPyKeras.h" - -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include - -#include "TMVA/Types.h" -#include "TMVA/Config.h" -#include "TMVA/ClassifierFactory.h" -#include "TMVA/Results.h" -#include "TMVA/TransformationHandler.h" -#include "TMVA/VariableTransformBase.h" -#include "TMVA/Tools.h" -#include "TMVA/Timer.h" -#include "TObjString.h" -#include "TSystem.h" -#include "Math/Util.h" - -using namespace TMVA; - -namespace TMVA { -namespace Internal { -class PyGILRAII { - PyGILState_STATE m_GILState; - -public: - PyGILRAII() : m_GILState(PyGILState_Ensure()) {} - ~PyGILRAII() { PyGILState_Release(m_GILState); } -}; -} // namespace Internal -} // namespace TMVA - -REGISTER_METHOD(PyKeras) - - -MethodPyKeras::MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption) - : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) { - fNumEpochs = 10; - fNumThreads = 0; - fBatchSize = 100; - fVerbose = 1; - fContinueTraining = false; - fSaveBestOnly = true; - fTriesEarlyStopping = -1; - fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler - fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/) - fTensorBoard = ""; // empty string deactivates TensorBoard callback -} - -MethodPyKeras::MethodPyKeras(DataSetInfo &theData, const TString &theWeightFile) - : PyMethodBase(Types::kPyKeras, theData, theWeightFile) { - fNumEpochs = 10; - fNumThreads = 0; - fBatchSize = 100; - fVerbose = 1; - fContinueTraining = false; - fSaveBestOnly = true; - fTriesEarlyStopping = -1; - fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler - fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/) - fTensorBoard = ""; // empty string deactivates TensorBoard callback -} - -MethodPyKeras::~MethodPyKeras() { -} - -Bool_t MethodPyKeras::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) { - if (type == Types::kRegression) return kTRUE; - if (type == Types::kClassification && numberClasses == 2) return kTRUE; - if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE; - return kFALSE; -} - -/////////////////////////////////////////////////////////////////////////////// - -void MethodPyKeras::DeclareOptions() { - DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model"); - DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model"); - DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size"); - DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs"); - DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)"); - DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth"); - DeclareOptionRef(fUseTFKeras, "tf.keras", "Use tensorflow from Keras"); - DeclareOptionRef(fUseTFKeras, "tfkeras", "Use tensorflow from Keras"); - DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training"); - DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training"); - DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss"); - DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option."); - DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\""); - DeclareOptionRef(fTensorBoard, "TensorBoard", - "Write a log during training to visualize and monitor the training performance with TensorBoard"); - - DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. " - "Specify as 0.2 or 20% to use a fifth of the data set as validation set. " - "Specify as 100 to use exactly 100 events. (Default: 20%)"); - DeclareOptionRef(fUserCodeName = "", "UserCode", - "Optional python code provided by the user to be executed before loading the Keras model"); -} - -//////////////////////////////////////////////////////////////////////////////// -/// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and -/// 100 etc. -/// - 20% and 0.2 selects 20% of the training set as validation data. -/// - 100 selects 100 events as the validation data. -/// -/// @return number of samples in validation set -/// -UInt_t TMVA::MethodPyKeras::GetNumValidationSamples() -{ - Int_t nValidationSamples = 0; - UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size(); - - // Parsing + Validation - // -------------------- - if (fNumValidationString.EndsWith("%")) { - // Relative spec. format 20% - TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%')); - - if (intValStr.IsFloat()) { - Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0; - nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble; - } else { - Log() << kFATAL << "Cannot parse number \"" << fNumValidationString - << "\". Expected string like \"20%\" or \"20.0%\"." << Endl; - } - } else if (fNumValidationString.IsFloat()) { - Double_t valSizeAsDouble = fNumValidationString.Atof(); - - if (valSizeAsDouble < 1.0) { - // Relative spec. format 0.2 - nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble; - } else { - // Absolute spec format 100 or 100.0 - nValidationSamples = valSizeAsDouble; - } - } else { - Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"." - << Endl; - } - - // Value validation - // ---------------- - if (nValidationSamples < 0) { - Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl; - } - - if (nValidationSamples == 0) { - Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl; - } - - if (nValidationSamples >= (Int_t)trainingSetSize) { - Log() << kFATAL << "Validation size \"" << fNumValidationString - << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl; - } - - return nValidationSamples; -} - -/// Function processing the options -/// This is called only when creating the method before training not when -/// reading from XML file. Called from MethodBase::ProcessSetup -/// that is called from Factory::BookMethod -void MethodPyKeras::ProcessOptions() { - - // Set default filename for trained model if option is not used - if (fFilenameTrainedModel.IsNull()) { - fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5"; - } - - InitKeras(); - - // Setup model, either the initial model from `fFilenameModel` or - // the trained model from `fFilenameTrainedModel` - if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl; - SetupKerasModel(fContinueTraining); -} - -void MethodPyKeras::InitKeras() { - // initialize first Keras. This is done only here when class has - // all state variable set from options or read from XML file - // Import Keras - - if (fUseTFKeras) - Log() << kINFO << "Setting up tf.keras" << Endl; - else - Log() << kINFO << "Setting up keras with " << gSystem->Getenv("KERAS_BACKEND") << " backend" << Endl; - - bool useTFBackend = kFALSE; - bool kerasIsCompatible = kTRUE; - bool kerasIsPresent = kFALSE; - - if (!fUseTFKeras) { - auto ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fLocalNS); - // need importing also in global namespace - if (ret != nullptr) ret = PyRun_String("import keras", Py_single_input, fGlobalNS, fGlobalNS); - if (ret != nullptr) - kerasIsPresent = kTRUE; - if (kerasIsPresent) { - // check compatibility with tensorflow - if (GetKerasBackend() == kTensorFlow ) { - useTFBackend = kTRUE; - - PyRunString("keras_major_version = int(keras.__version__.split('.')[0])"); - PyRunString("keras_minor_version = int(keras.__version__.split('.')[1])"); - PyObject *pyKerasMajorVersion = PyDict_GetItemString(fLocalNS, "keras_major_version"); - PyObject *pyKerasMinorVersion = PyDict_GetItemString(fLocalNS, "keras_minor_version"); - int kerasMajorVersion = PyLong_AsLong(pyKerasMajorVersion); - int kerasMinorVersion = PyLong_AsLong(pyKerasMinorVersion); - Log() << kINFO << "Using Keras version " << kerasMajorVersion << "." << kerasMinorVersion << Endl; - // only version 2.3 is latest multi-backend version. - // version 2.4 is just tf.keras and should not be used in standalone and will not work in this workflow - // see https://github.com/keras-team/keras/releases/tag/2.4.0 - // for example variable keras.backend.tensorflow_backend will not exist anymore in keras 2.4 - kerasIsCompatible = (kerasMajorVersion >= 2 && kerasMinorVersion == 3); - - } - } else { - // Keras is not found. try tyo use tf.keras - Log() << kINFO << "Keras is not found. Trying using tf.keras" << Endl; - fUseTFKeras = 1; - } - } - - // import Tensoprflow (if requested or because is keras backend) - if (fUseTFKeras || useTFBackend) { - auto ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fLocalNS); - if (ret != nullptr) ret = PyRun_String("import tensorflow as tf", Py_single_input, fGlobalNS, fGlobalNS); - if (ret == nullptr) { - Log() << kFATAL << "Importing TensorFlow failed" << Endl; - } - // check tensorflow version - PyRunString("tf_major_version = int(tf.__version__.split('.')[0])"); - PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version"); - int tfVersion = PyLong_AsLong(pyTfVersion); - Log() << kINFO << "Using TensorFlow version " << tfVersion << Endl; - - if (tfVersion < 2) { - if (fUseTFKeras == 1) { - Log() << kWARNING << "Using TensorFlow version 1.x which does not contain tf.keras - use then TensorFlow as Keras backend" << Endl; - fUseTFKeras = kFALSE; - // case when Keras was not found - if (!kerasIsPresent) { - Log() << kFATAL << "Keras is not present and not a suitable TensorFlow version is found " << Endl; - return; - } - } - } - else { - // using version larger than 2.0 - can use tf.keras - if (!kerasIsCompatible) { - Log() << kWARNING << "The Keras version is not compatible with TensorFlow 2. Use instead tf.keras" << Endl; - fUseTFKeras = 1; - } - } - - // if keras 2.3 and tensorflow 2 are found. Use tf.keras or keras ? - // at the moment default is tf.keras=false to keep compatibility - // but this might change in future releases - if (fUseTFKeras) { - Log() << kINFO << "Use Keras version from TensorFlow : tf.keras" << Endl; - fKerasString = "tf.keras"; - PyRunString("K = tf.keras.backend"); - PyRun_String("K = tf.keras.backend", Py_single_input, fGlobalNS, fGlobalNS); - } - else { - Log() << kINFO << "Use TensorFlow as Keras backend" << Endl; - fKerasString = "keras"; - PyRunString("from keras.backend import tensorflow_backend as K"); - PyRun_String("from keras.backend import tensorflow_backend as K", Py_single_input, fGlobalNS, fGlobalNS); - } - - // extra options for tensorflow - // use different naming in tf2 for ConfigProto and Session - TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto"; - TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session"; - - // in case specify number of threads - int num_threads = fNumThreads; - if (num_threads > 0) { - Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl; - - PyRunString( - TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)", - configProto.Data(), num_threads, num_threads)); - } else - PyRunString(TString::Format("session_conf = %s()", configProto.Data())); - - // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU - // that prevents running later TMVA-GPU - // Also new Nvidia RTX cards (e.g. RTX 2070) require this option - if (!fGpuOptions.IsNull()) { - TObjArray *optlist = fGpuOptions.Tokenize(","); - for (int item = 0; item < optlist->GetEntries(); ++item) { - Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl; - PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName())); - } - } - PyRunString(TString::Format("sess = %s(config=session_conf)", session.Data())); - - if (tfVersion < 2) { - PyRunString("K.set_session(sess)"); - } else { - PyRunString("tf.compat.v1.keras.backend.set_session(sess)"); - } - } - // case not using a Tensorflow backend - else { - fKerasString = "keras"; - if (fNumThreads > 0) - Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" - << Endl; - if (!fGpuOptions.IsNull()) { - Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions - << " when not using tensorflow as backend" << Endl; - } - } - -} - -void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) { - /* - * Load Keras model from file - */ - - Log() << kINFO << " Loading Keras Model " << Endl; - - PyRunString("load_model_custom_objects=None"); - - - - if (!fUserCodeName.IsNull()) { - Log() << kINFO << " Executing user initialization code from " << fUserCodeName << Endl; - - - // run some python code provided by user for model initialization if needed - TString cmd = "exec(open('" + fUserCodeName + "').read())"; - TString errmsg = "Error executing the provided user code"; - PyRunString(cmd, errmsg); - - PyRunString("print('custom objects for loading model : ',load_model_custom_objects)"); - } - - // Load initial model or already trained model - TString filenameLoadModel; - if (loadTrainedModel) { - filenameLoadModel = fFilenameTrainedModel; - } - else { - filenameLoadModel = fFilenameModel; - } - - PyRunString("model = " + fKerasString + ".models.load_model('" + filenameLoadModel + - "', custom_objects=load_model_custom_objects)", "Failed to load Keras model from file: " + filenameLoadModel); - - Log() << kINFO << "Loaded model from file: " << filenameLoadModel << Endl; - - - /* - * Init variables and weights - */ - - // Get variables, classes and target numbers - fNVars = GetNVariables(); - if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) fNOutputs = DataInfo().GetNClasses(); - else if (GetAnalysisType() == Types::kRegression) fNOutputs = DataInfo().GetNTargets(); - else Log() << kFATAL << "Selected analysis type is not implemented" << Endl; - - // Mark the model as setup - fModelIsSetup = true; - fModelIsSetupForEval = false; -} - -///Setting up model for evaluation -/// Add here some needed optimizations like disabling eager execution -void MethodPyKeras::SetupKerasModelForEval() { - - InitKeras(); - - // disable eager execution (model will evaluate > 100 faster) - // need to be done before loading the model -#ifndef R__MACOSX // problem siabling eager execution on Macos (conflict with multiprocessing) - if (fUseTFKeras){ - PyRunString("tf.compat.v1.disable_eager_execution()","Failed to disable eager execution"); - Log() << kINFO << "Disabled TF eager execution when evaluating model " << Endl; - } -#endif - - SetupKerasModel(true); - - // Init evaluation (needed for getMvaValue) - if (fNVars > 0) { - fVals.resize(fNVars); // holds values used for classification and regression - npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)fNVars}; - PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (void*)fVals.data()); - PyDict_SetItemString(fLocalNS, "vals", (PyObject*)pVals); - } - // setup output variables - if (fNOutputs > 0) { - fOutput.resize(fNOutputs); // holds classification probabilities or regression output - npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)fNOutputs}; - PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (void*)fOutput.data()); - PyDict_SetItemString(fLocalNS, "output", (PyObject*)pOutput); - } - - fModelIsSetupForEval = true; -} - -/// Initialization function called from MethodBase::SetupMethod() -/// Note that option string are not yet filled with their values. -/// This is done before ProcessOption method or after reading from XML file -void MethodPyKeras::Init() { - - TMVA::Internal::PyGILRAII raii; - - if (!PyIsInitialized()) { - Log() << kFATAL << "Python is not initialized" << Endl; - } - _import_array(); // required to use numpy arrays - - // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks - PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed"); - - // Set flag that model is not setup - fModelIsSetup = false; - fModelIsSetupForEval = false; -} - -void MethodPyKeras::Train() { - - if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl; - - /* - * Load training data to numpy array - */ - - UInt_t nAllEvents = Data()->GetNTrainingEvents(); - UInt_t nValEvents = GetNumValidationSamples(); - UInt_t nTrainingEvents = nAllEvents - nValEvents; - - Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and " - << nValEvents << " validation events" << Endl; - - float* trainDataX = new float[nTrainingEvents*fNVars]; - float* trainDataY = new float[nTrainingEvents*fNOutputs]; - float* trainDataWeights = new float[nTrainingEvents]; - for (UInt_t i=0; iGetValue(j); - } - // Fill targets - // NOTE: For classification, convert class number in one-hot vector, - // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification - if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) { - for (UInt_t j=0; jGetClass() + i*fNOutputs] = 1; - } - else if (GetAnalysisType() == Types::kRegression) { - for (UInt_t j=0; jGetTarget(j); - } - } - else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl; - // Fill weights - // NOTE: If no weight branch is given, this defaults to ones for all events - trainDataWeights[i] = e->GetWeight(); - } - - npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNVars}; - npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNOutputs}; - npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents}; - PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (void*)trainDataX); - PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (void*)trainDataY); - PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (void*)trainDataWeights); - PyDict_SetItemString(fLocalNS, "trainX", (PyObject*)pTrainDataX); - PyDict_SetItemString(fLocalNS, "trainY", (PyObject*)pTrainDataY); - PyDict_SetItemString(fLocalNS, "trainWeights", (PyObject*)pTrainDataWeights); - - /* - * Load validation data to numpy array - */ - - // NOTE: from TMVA, we get the validation data as a subset of all the training data - // we will not use test data for validation. They will be used for the real testing - - - float* valDataX = new float[nValEvents*fNVars]; - float* valDataY = new float[nValEvents*fNOutputs]; - float* valDataWeights = new float[nValEvents]; - //validation events follows the trainig one in the TMVA training vector - for (UInt_t i=0; i< nValEvents ; i++) { - UInt_t ievt = nTrainingEvents + i; // TMVA event index - const TMVA::Event* e = GetTrainingEvent(ievt); - // Fill variables - for (UInt_t j=0; jGetValue(j); - } - // Fill targets - if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) { - for (UInt_t j=0; jGetClass() + i*fNOutputs] = 1; - } - else if (GetAnalysisType() == Types::kRegression) { - for (UInt_t j=0; jGetTarget(j); - } - } - else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl; - // Fill weights - valDataWeights[i] = e->GetWeight(); - } - - npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)fNVars}; - npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)fNOutputs}; - npy_intp dimsValWeights[1] = {(npy_intp)nValEvents}; - PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (void*)valDataX); - PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (void*)valDataY); - PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (void*)valDataWeights); - PyDict_SetItemString(fLocalNS, "valX", (PyObject*)pValDataX); - PyDict_SetItemString(fLocalNS, "valY", (PyObject*)pValDataY); - PyDict_SetItemString(fLocalNS, "valWeights", (PyObject*)pValDataWeights); - - /* - * Train Keras model - */ - Log() << kINFO << "Training Model Summary" << Endl; - PyRunString("model.summary()"); - - // Setup parameters - - PyObject* pBatchSize = PyLong_FromLong(fBatchSize); - PyObject* pNumEpochs = PyLong_FromLong(fNumEpochs); - PyObject* pVerbose = PyLong_FromLong(fVerbose); - PyDict_SetItemString(fLocalNS, "batchSize", pBatchSize); - PyDict_SetItemString(fLocalNS, "numEpochs", pNumEpochs); - PyDict_SetItemString(fLocalNS, "verbose", pVerbose); - - // Setup training callbacks - PyRunString("callbacks = []"); - - // Callback: Save only weights with smallest validation loss - if (fSaveBestOnly) { - PyRunString("callbacks.append(" + fKerasString +".callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly"); - Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl; - } - - // Callback: Stop training early if no improvement in validation loss is observed - if (fTriesEarlyStopping>=0) { - TString tries; - tries.Form("%i", fTriesEarlyStopping); - PyRunString("callbacks.append(" + fKerasString + ".callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping"); - Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl; - } - - // Callback: Learning rate scheduler - if (fLearningRateSchedule != "") { - // tokenize learning rate schedule (e.g. "10,0.01;20,0.001" given as epoch,lr) - std::vector> scheduleSteps; - auto lrSteps = fLearningRateSchedule.Tokenize(";"); - for (auto obj : *lrSteps) { - TString step = obj->GetName(); - auto x = step.Tokenize(","); - if (!x || x->GetEntries() != 2) { - Log() << kFATAL << "Invalid values given in LearningRateSchedule, it should be as \"10,0.1;20,0.01\"" - << Endl; - } - scheduleSteps.push_back(std::make_pair( std::string((*x)[0]->GetName() ) , - std::string((*x)[1]->GetName() ) ) ); - std::cout << " add learning rate schedule " << scheduleSteps.back().first << " : " << scheduleSteps.back().second << std::endl; - } - // Set scheduler function as piecewise function with given steps - TString epochsList = "epochs = ["; - TString valuesList = "lrValues = ["; - for (size_t i = 0; i < scheduleSteps.size(); i++) { - epochsList += TString(scheduleSteps[i].first.c_str()); - valuesList += TString(scheduleSteps[i].second.c_str()); - if (i < scheduleSteps.size()-1) { - epochsList += ", "; - valuesList += ", "; - } - } - epochsList += "]"; - valuesList += "]"; - TString scheduleFunction = "def schedule(epoch, lr):\n" - " i = 0\n" - " " + epochsList + "\n" - " " + valuesList + "\n" - " for e in epochs:\n" - " if (epoch < e) :\n" - " return lrValues[i]\n" - " i+=1\n" - " return lr\n"; - PyRunString( scheduleFunction, - "Failed to setup scheduler function with string: " + fLearningRateSchedule, Py_file_input); - // Setup callback - PyRunString("callbacks.append(" + fKerasString + ".callbacks.LearningRateScheduler(schedule, verbose=True))", - "Failed to setup training callback: LearningRateSchedule"); - Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl; - } - - // Callback: TensorBoard - if (fTensorBoard != "") { - TString logdir = TString("'") + fTensorBoard + TString("'"); - PyRunString( - "callbacks.append(" + fKerasString + ".callbacks.TensorBoard(log_dir=" + logdir + - ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))", - "Failed to setup training callback: TensorBoard"); - Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl; - } - - // Train model - PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)", - "Failed to train model"); - - - std::vector fHistory; // Hold training history (val_acc or loss etc) - fHistory.resize(fNumEpochs); // holds training loss or accuracy output - npy_intp dimsHistory[1] = { (npy_intp)fNumEpochs}; - PyArrayObject* pHistory = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsHistory, NPY_FLOAT, (void*)&fHistory[0]); - PyDict_SetItemString(fLocalNS, "HistoryOutput", (PyObject*)pHistory); - - // Store training history data - Int_t iHis=0; - PyRunString("number_of_keys=len(history.history.keys())"); - PyObject* PyNkeys=PyDict_GetItemString(fLocalNS, "number_of_keys"); - int nkeys=PyLong_AsLong(PyNkeys); - for (iHis=0; iHisGetValue(i); - int verbose = (int) Verbose(); - std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose) - + ")): output[i]=p\n"; - PyRunString(code,"Failed to get predictions"); - - return fOutput[TMVA::Types::kSignal]; -} - -std::vector MethodPyKeras::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) { - // Check whether the model is setup - // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again - if (!fModelIsSetupForEval) { - // Setup the trained model - SetupKerasModelForEval(); - } - - // Load data to numpy array - Long64_t nEvents = Data()->GetNEvents(); - if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents; - if (firstEvt < 0) firstEvt = 0; - nEvents = lastEvt-firstEvt; - - // use timer - Timer timer( nEvents, GetName(), kTRUE ); - - if (logProgress) - Log() << kHEADER << Form("[%s] : ",DataInfo().GetName()) - << "Evaluation of " << GetMethodName() << " on " - << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing") - << " sample (" << nEvents << " events)" << Endl; - - float* data = new float[nEvents*fNVars]; - for (UInt_t i=0; iSetCurrentEvent(i); - const TMVA::Event *e = GetEvent(); - for (UInt_t j=0; jGetValue(j); - } - } - - std::vector mvaValues(nEvents); - npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars}; - PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (void*)data); - if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl; - - // Get prediction for all events - PyObject* pModel = PyDict_GetItemString(fLocalNS, "model"); - if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl; - PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (char*)"predict", (char*)"O", pDataMvaValues); - if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl; - delete[] data; - // Load predictions to double vector - // NOTE: The signal probability is given at the output - float* predictionsData = (float*) PyArray_DATA(pPredictions); - - for (UInt_t i=0; i& MethodPyKeras::GetRegressionValues() { - // Check whether the model is setup - // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again - if (!fModelIsSetupForEval){ - // Setup the model and load weights - //std::cout << "setup model for evaluation" << std::endl; - //PyRunString("tf.compat.v1.disable_eager_execution()","Failed to disable eager execution"); - SetupKerasModelForEval(); - } - - // Get regression values - const TMVA::Event* e = GetEvent(); - for (UInt_t i=0; iGetValue(i); - int verbose = (int) Verbose(); - std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose) - + ")): output[i]=p\n"; - PyRunString(code,"Failed to get predictions"); - - // Use inverse transformation of targets to get final regression values - Event * eTrans = new Event(*e); - for (UInt_t i=0; iSetTarget(i,fOutput[i]); - } - - const Event* eTrans2 = GetTransformationHandler().InverseTransform(eTrans); - for (UInt_t i=0; iGetTarget(i); - } - - return fOutput; -} - -std::vector& MethodPyKeras::GetMulticlassValues() { - // Check whether the model is setup - // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again - if (!fModelIsSetupForEval){ - // Setup the model and load weights - SetupKerasModelForEval(); - } - - // Get class probabilites - const TMVA::Event* e = GetEvent(); - for (UInt_t i=0; iGetValue(i); - int verbose = (int) Verbose(); - std::string code = "for i,p in enumerate(model.predict(vals, verbose=" + ROOT::Math::Util::ToString(verbose) - + ")): output[i]=p\n"; - PyRunString(code,"Failed to get predictions"); - - return fOutput; -} - -void MethodPyKeras::ReadModelFromFile() { -} - -void MethodPyKeras::GetHelpMessage() const { -// typical length of text line: -// "|--------------------------------------------------------------|" - Log() << Endl; - Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl; - Log() << "This method wraps the training and predictions steps of the Keras" << Endl; - Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl; - Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl; - Log() << "interface, you have to generate a model with Keras first. Then," << Endl; - Log() << "this model can be loaded and trained in TMVA." << Endl; - Log() << Endl; -} - -MethodPyKeras::EBackendType MethodPyKeras::GetKerasBackend() { - // get the keras backend - - // in case we use tf.keras backend is tensorflow - if (UseTFKeras()) return kTensorFlow; - - // check first if using tensorflow backend - PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\""); - PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set"); - if (keras_backend != nullptr && keras_backend == Py_True) - return kTensorFlow; - - PyRunString("keras_backend_is_set = keras.backend.backend() == \"theano\""); - keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set"); - if (keras_backend != nullptr && keras_backend == Py_True) - return kTheano; - - PyRunString("keras_backend_is_set = keras.backend.backend() == \"cntk\""); - keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set"); - if (keras_backend != nullptr && keras_backend == Py_True) - return kCNTK; - - return kUndefined; -} - -TString MethodPyKeras::GetKerasBackendName() { - // get the keras backend name - EBackendType type = GetKerasBackend(); - if (type == kTensorFlow) return "TensorFlow"; - if (type == kTheano) return "Theano"; - if (type == kCNTK) return "CNTK"; - return "Undefined"; -} diff --git a/tmva/pymva/test/CMakeLists.txt b/tmva/pymva/test/CMakeLists.txt index 52014fdd4e253..023023cf833cd 100644 --- a/tmva/pymva/test/CMakeLists.txt +++ b/tmva/pymva/test/CMakeLists.txt @@ -15,7 +15,6 @@ set(Libraries Core MathCore TMVA PyMVA) # Look for needed python modules ROOT_FIND_PYTHON_MODULE(torch) -ROOT_FIND_PYTHON_MODULE(keras) ROOT_FIND_PYTHON_MODULE(tensorflow) ROOT_FIND_PYTHON_MODULE(sklearn) @@ -81,32 +80,3 @@ if(ROOT_TORCH_FOUND) ROOT_ADD_TEST(PyMVA-Torch-Multiclass COMMAND testPyTorchMulticlass DEPENDS ${PyMVA-Torch-Multiclass-depends}) endif(ROOT_TORCH_FOUND) - -if((ROOT_KERAS_FOUND AND ROOT_THEANO_FOUND) OR (ROOT_KERAS_FOUND AND ROOT_TENSORFLOW_FOUND)) - - if (ROOT_TORCH_FOUND) - set(PyMVA-Keras-Classification-depends PyMVA-Torch-Classification) - set(PyMVA-Keras-Regression-depends PyMVA-Torch-Regression) - set(PyMVA-Keras-Multiclass-depends PyMVA-Torch-Multiclass) - endif() - - - # Test PyKeras: Binary classification - ROOT_EXECUTABLE(testPyKerasClassification testPyKerasClassification.C - LIBRARIES ${Libraries}) - ROOT_ADD_TEST(PyMVA-Keras-Classification COMMAND testPyKerasClassification DEPENDS ${PyMVA-Keras-Classification-depends}) - - # Test PyKeras: Regression - if (NOT ROOT_ARCHITECTURE MATCHES macosx) - #veto also keras tutorial on macos due to issue in disabling eager execution on macos - ROOT_EXECUTABLE(testPyKerasRegression testPyKerasRegression.C - LIBRARIES ${Libraries}) - ROOT_ADD_TEST(PyMVA-Keras-Regression COMMAND testPyKerasRegression DEPENDS ${PyMVA-Keras-Regression-depends}) - endif() - - - # Test PyKeras: Multi-class classification - ROOT_EXECUTABLE(testPyKerasMulticlass testPyKerasMulticlass.C - LIBRARIES ${Libraries}) - ROOT_ADD_TEST(PyMVA-Keras-Multiclass COMMAND testPyKerasMulticlass DEPENDS ${PyMVA-Keras-Multiclass-depends}) -endif() diff --git a/tmva/pymva/test/testPyKerasClassification.C b/tmva/pymva/test/testPyKerasClassification.C deleted file mode 100644 index 93df6a6da2037..0000000000000 --- a/tmva/pymva/test/testPyKerasClassification.C +++ /dev/null @@ -1,137 +0,0 @@ -#include - -#include "TString.h" -#include "TFile.h" -#include "TTree.h" -#include "TSystem.h" -#include "TROOT.h" -#include "TMVA/Factory.h" -#include "TMVA/Reader.h" -#include "TMVA/DataLoader.h" -#include "TMVA/PyMethodBase.h" - -TString pythonSrc = "\ -from tensorflow.keras.models import Sequential\n\ -from tensorflow.keras.layers import Dense, Activation\n\ -from tensorflow.keras.optimizers import Adam\n\ -\n\ -model = Sequential()\n\ -model.add(Dense(64, activation=\"relu\", input_dim=4))\n\ -model.add(Dense(2, activation=\"softmax\"))\n\ -model.compile(loss=\"categorical_crossentropy\", optimizer=\"Adam\", weighted_metrics=[\"accuracy\",])\n\ -model.save(\"kerasModelClassification.h5\")\n"; - -int testPyKerasClassification(){ - - // Get data file - std::cout << "Get test data..." << std::endl; - TString fname = gROOT->GetTutorialDir() + "/machine_learning/data/tmva_class_example.root"; - TFile *input = TFile::Open(fname); - if (!input) { - std::cout << "ERROR: could not open data file " << fname << std::endl; - return 1; - } - - // Build model from python file - if (gSystem->AccessPathName("kerasModelClassification.h5")) { - std::cout << "Generate keras model..." << std::endl; - UInt_t ret; - ret = gSystem->Exec("echo '"+pythonSrc+"' > generateKerasModelClassification.py"); - if(ret!=0){ - std::cout << "[ERROR] Failed to write python code to file" << std::endl; - return 1; - } - ret = gSystem->Exec(TMVA::Python_Executable() + " generateKerasModelClassification.py"); - if(ret!=0){ - std::cout << "[ERROR] Failed to generate model using python" << std::endl; - return 1; - } - } - // Setup PyMVA and factory - std::cout << "Setup TMVA..." << std::endl; - TMVA::PyMethodBase::PyInitialize(); - TFile* outputFile = TFile::Open("ResultsTestPyKerasClassification.root", "RECREATE"); - TMVA::Factory *factory = new TMVA::Factory("testPyKerasClassification", outputFile, - "!V:Silent:Color:!DrawProgressBar:AnalysisType=Classification"); - - // Load data - TMVA::DataLoader *dataloader = new TMVA::DataLoader("datasetTestPyKerasClassification"); - - TTree *signal = (TTree*)input->Get("TreeS"); - TTree *background = (TTree*)input->Get("TreeB"); - dataloader->AddSignalTree(signal); - dataloader->AddBackgroundTree(background); - - dataloader->AddVariable("var1"); - dataloader->AddVariable("var2"); - dataloader->AddVariable("var3"); - dataloader->AddVariable("var4"); - - dataloader->PrepareTrainingAndTestTree("", - "SplitMode=Random:NormMode=NumEvents:!V"); - - // Book and train method - factory->BookMethod(dataloader, TMVA::Types::kPyKeras, "PyKeras", - "!H:!V:VarTransform=D,G:FilenameModel=kerasModelClassification.h5:FilenameTrainedModel=trainedKerasModelClassification.h5:NumEpochs=10:BatchSize=32:SaveBestOnly=false:Verbose=0:NumThreads=1:tf.keras"); - std::cout << "Train model..." << std::endl; - factory->TrainAllMethods(); - - // Clean-up - delete factory; - delete dataloader; - delete outputFile; - - // Setup reader - UInt_t numEvents = 100; - std::cout << "Run reader and classify " << numEvents << " events..." << std::endl; - TMVA::Reader *reader = new TMVA::Reader("!Color:Silent"); - Float_t vars[4]; - reader->AddVariable("var1", vars+0); - reader->AddVariable("var2", vars+1); - reader->AddVariable("var3", vars+2); - reader->AddVariable("var4", vars+3); - std::cout << "Booking PyKeras for TMVA::Reader\n"; - reader->BookMVA("PyKeras", "datasetTestPyKerasClassification/weights/testPyKerasClassification_PyKeras.weights.xml"); - - // Get mean response of method on signal and background events - signal->SetBranchAddress("var1", vars+0); - signal->SetBranchAddress("var2", vars+1); - signal->SetBranchAddress("var3", vars+2); - signal->SetBranchAddress("var4", vars+3); - - background->SetBranchAddress("var1", vars+0); - background->SetBranchAddress("var2", vars+1); - background->SetBranchAddress("var3", vars+2); - background->SetBranchAddress("var4", vars+3); - - Float_t meanMvaSignal = 0; - Float_t meanMvaBackground = 0; - std::cout << "Reading events....\n"; - for(UInt_t i=0; iGetEntry(i); - meanMvaSignal += reader->EvaluateMVA("PyKeras"); - background->GetEntry(i); - meanMvaBackground += reader->EvaluateMVA("PyKeras"); - } - meanMvaSignal = meanMvaSignal/float(numEvents); - meanMvaBackground = meanMvaBackground/float(numEvents); - - // Check whether the response is obviously better than guessing - std::cout << "Mean MVA response on signal: " << meanMvaSignal << std::endl; - if(meanMvaSignal < 0.6){ - std::cout << "[ERROR] Mean response on signal is " << meanMvaSignal << " (<0.6)" << std::endl; - return 1; - } - std::cout << "Mean MVA response on background: " << meanMvaBackground << std::endl; - if(meanMvaBackground > 0.4){ - std::cout << "[ERROR] Mean response on background is " << meanMvaBackground << " (>0.4)" << std::endl; - return 1; - } - - return 0; -} - -int main(){ - int err = testPyKerasClassification(); - return err; -} diff --git a/tmva/pymva/test/testPyKerasMulticlass.C b/tmva/pymva/test/testPyKerasMulticlass.C deleted file mode 100644 index feb6525399a16..0000000000000 --- a/tmva/pymva/test/testPyKerasMulticlass.C +++ /dev/null @@ -1,168 +0,0 @@ -#include - -#include "TString.h" -#include "TFile.h" -#include "TTree.h" -#include "TSystem.h" -#include "TROOT.h" -#include "TMVA/Factory.h" -#include "TMVA/Reader.h" -#include "TMVA/DataLoader.h" -#include "TMVA/PyMethodBase.h" - -TString pythonSrc = "\ -from tensorflow.keras.models import Sequential\n\ -from tensorflow.keras.layers import Dense, Activation\n\ -from tensorflow.keras.optimizers import Adam\n\ -\n\ -model = Sequential()\n\ -model.add(Dense(64, activation=\"relu\", input_dim=4))\n\ -model.add(Dense(4, activation=\"softmax\"))\n\ -model.compile(loss=\"categorical_crossentropy\", optimizer=Adam(), weighted_metrics=[\"accuracy\",])\n\ -model.save(\"kerasModelMulticlass.h5\")\n"; - -int testPyKerasMulticlass(){ - // Get data file - std::cout << "Get test data..." << std::endl; - TString fname = "./tmva_example_multiple_background.root"; - if (gSystem->AccessPathName(fname)){ // file does not exist in local directory - std::cout << "Create multiclass test data..." << std::endl; - TString createDataMacro = TString(gROOT->GetTutorialsDir()) + "/machine_learning/createData.C"; - gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data())); - gROOT->ProcessLine("create_MultipleBackground(200)"); - std::cout << "Created " << fname << " for tests of the multiclass features" << std::endl; - } - TFile *input = TFile::Open(fname); - - // Build model from python file - std::cout << "Generate keras model..." << std::endl; - UInt_t ret; - ret = gSystem->Exec("echo '"+pythonSrc+"' > generateKerasModelMulticlass.py"); - if(ret!=0){ - std::cout << "[ERROR] Failed to write python code to file" << std::endl; - return 1; - } - ret = gSystem->Exec(TMVA::Python_Executable() + " generateKerasModelMulticlass.py"); - if(ret!=0){ - std::cout << "[ERROR] Failed to generate model using python" << std::endl; - return 1; - } - - // Setup PyMVA and factory - std::cout << "Setup TMVA..." << std::endl; - TMVA::PyMethodBase::PyInitialize(); - TFile* outputFile = TFile::Open("ResultsTestPyKerasMulticlass.root", "RECREATE"); - TMVA::Factory *factory = new TMVA::Factory("testPyKerasMulticlass", outputFile, - "!V:Silent:Color:!DrawProgressBar:AnalysisType=multiclass"); - - // Load data - TMVA::DataLoader *dataloader = new TMVA::DataLoader("datasetTestPyKerasMulticlass"); - - TTree *signal = (TTree*)input->Get("TreeS"); - TTree *background0 = (TTree*)input->Get("TreeB0"); - TTree *background1 = (TTree*)input->Get("TreeB1"); - TTree *background2 = (TTree*)input->Get("TreeB2"); - dataloader->AddTree(signal, "Signal"); - dataloader->AddTree(background0, "Background_0"); - dataloader->AddTree(background1, "Background_1"); - dataloader->AddTree(background2, "Background_2"); - - dataloader->AddVariable("var1"); - dataloader->AddVariable("var2"); - dataloader->AddVariable("var3"); - dataloader->AddVariable("var4"); - - dataloader->PrepareTrainingAndTestTree("", - "SplitMode=Random:NormMode=NumEvents:!V"); - - // Book and train method - factory->BookMethod(dataloader, TMVA::Types::kPyKeras, "PyKeras", - "!H:!V:VarTransform=D,G:FilenameModel=kerasModelMulticlass.h5:FilenameTrainedModel=trainedKerasModelMulticlass.h5:NumEpochs=20:BatchSize=32:SaveBestOnly=false:Verbose=0"); - std::cout << "Train model..." << std::endl; - factory->TrainAllMethods(); - - // Clean-up - delete factory; - delete dataloader; - delete outputFile; - - // Setup reader - UInt_t numEvents = 100; - std::cout << "Run reader and classify " << numEvents << " events..." << std::endl; - TMVA::Reader *reader = new TMVA::Reader("!Color:Silent"); - Float_t vars[4]; - reader->AddVariable("var1", vars+0); - reader->AddVariable("var2", vars+1); - reader->AddVariable("var3", vars+2); - reader->AddVariable("var4", vars+3); - reader->BookMVA("PyKeras", "datasetTestPyKerasMulticlass/weights/testPyKerasMulticlass_PyKeras.weights.xml"); - - // Get mean response of method on signal and background events - signal->SetBranchAddress("var1", vars+0); - signal->SetBranchAddress("var2", vars+1); - signal->SetBranchAddress("var3", vars+2); - signal->SetBranchAddress("var4", vars+3); - - background0->SetBranchAddress("var1", vars+0); - background0->SetBranchAddress("var2", vars+1); - background0->SetBranchAddress("var3", vars+2); - background0->SetBranchAddress("var4", vars+3); - - background1->SetBranchAddress("var1", vars+0); - background1->SetBranchAddress("var2", vars+1); - background1->SetBranchAddress("var3", vars+2); - background1->SetBranchAddress("var4", vars+3); - - background2->SetBranchAddress("var1", vars+0); - background2->SetBranchAddress("var2", vars+1); - background2->SetBranchAddress("var3", vars+2); - background2->SetBranchAddress("var4", vars+3); - - Float_t meanMvaSignal = 0; - Float_t meanMvaBackground0 = 0; - Float_t meanMvaBackground1 = 0; - Float_t meanMvaBackground2 = 0; - for(UInt_t i=0; iGetEntry(i); - meanMvaSignal += reader->EvaluateMulticlass("PyKeras")[0]; - background0->GetEntry(i); - meanMvaBackground0 += reader->EvaluateMulticlass("PyKeras")[1]; - background1->GetEntry(i); - meanMvaBackground1 += reader->EvaluateMulticlass("PyKeras")[2]; - background2->GetEntry(i); - meanMvaBackground2 += reader->EvaluateMulticlass("PyKeras")[3]; - } - meanMvaSignal = meanMvaSignal/float(numEvents); - meanMvaBackground0 = meanMvaBackground0/float(numEvents); - meanMvaBackground1 = meanMvaBackground1/float(numEvents); - meanMvaBackground2 = meanMvaBackground2/float(numEvents); - - // Check whether the response is obviously better than guessing - std::cout << "Mean MVA response on signal: " << meanMvaSignal << std::endl; - if(meanMvaSignal < 0.3){ - std::cout << "[ERROR] Mean response on signal is " << meanMvaSignal << " (<0.3)" << std::endl; - return 1; - } - std::cout << "Mean MVA response on background 0: " << meanMvaBackground0 << std::endl; - if(meanMvaBackground0 < 0.3){ - std::cout << "[ERROR] Mean response on background 0 is " << meanMvaBackground0 << " (<0.3)" << std::endl; - return 1; - } - std::cout << "Mean MVA response on background 1: " << meanMvaBackground1 << std::endl; - if(meanMvaBackground0 < 0.3){ - std::cout << "[ERROR] Mean response on background 1 is " << meanMvaBackground1 << " (<0.3)" << std::endl; - return 1; - } - std::cout << "Mean MVA response on background 2: " << meanMvaBackground2 << std::endl; - if(meanMvaBackground0 < 0.3){ - std::cout << "[ERROR] Mean response on background 2 is " << meanMvaBackground2 << " (<0.3)" << std::endl; - return 1; - } - - return 0; -} - -int main(){ - int err = testPyKerasMulticlass(); - return err; -} diff --git a/tmva/pymva/test/testPyKerasRegression.C b/tmva/pymva/test/testPyKerasRegression.C deleted file mode 100644 index 2b549f33c85af..0000000000000 --- a/tmva/pymva/test/testPyKerasRegression.C +++ /dev/null @@ -1,123 +0,0 @@ -#include "TString.h" -#include "TFile.h" -#include "TTree.h" -#include "TSystem.h" -#include "TROOT.h" -#include "TMVA/Factory.h" -#include "TMVA/Reader.h" -#include "TMVA/DataLoader.h" -#include "TMVA/PyMethodBase.h" - -#include -#include - -TString pythonSrc = "\ -from tensorflow.keras.models import Sequential\n\ -from tensorflow.keras.layers import Dense, Activation\n\ -from tensorflow.keras.optimizers import SGD\n\ -\n\ -model = Sequential()\n\ -model.add(Dense(64, activation=\"tanh\", input_dim=2))\n\ -model.add(Dense(1, activation=\"linear\"))\n\ -model.compile(loss=\"mean_squared_error\", optimizer=SGD(learning_rate=0.01), weighted_metrics=[])\n\ -model.save(\"kerasModelRegression.h5\")\n"; - -int testPyKerasRegression(){ - // Get data file - std::cout << "Get test data..." << std::endl; - TString fname = gROOT->GetTutorialDir() + "/machine_learning/data/tmva_reg_example.root"; - TFile *input = TFile::Open(fname); - if (!input) { - std::cout << "ERROR: could not open data file " << fname << std::endl; - return 1; - } - - // Build model from python file - std::cout << "Generate keras model..." << std::endl; - UInt_t ret; - ret = gSystem->Exec("echo '"+pythonSrc+"' > generateKerasModelRegression.py"); - if(ret!=0){ - std::cout << "[ERROR] Failed to write python code to file" << std::endl; - return 1; - } - ret = gSystem->Exec(TMVA::Python_Executable() + " generateKerasModelRegression.py"); - if(ret!=0){ - std::cout << "[ERROR] Failed to generate model using python" << std::endl; - return 1; - } - - // Setup PyMVA and factory - std::cout << "Setup TMVA..." << std::endl; - TMVA::PyMethodBase::PyInitialize(); - TFile* outputFile = TFile::Open("ResultsTestPyKerasRegression.root", "RECREATE"); - TMVA::Factory *factory = new TMVA::Factory("testPyKerasRegression", outputFile, - "!V:Silent:Color:!DrawProgressBar:AnalysisType=Regression"); - - // Load data - TMVA::DataLoader *dataloader = new TMVA::DataLoader("datasetTestPyKerasRegression"); - - TTree *tree = (TTree*)input->Get("TreeR"); - dataloader->AddRegressionTree(tree); - - dataloader->AddVariable("var1"); - dataloader->AddVariable("var2"); - dataloader->AddTarget("fvalue"); - - dataloader->PrepareTrainingAndTestTree("", -#ifdef R__MACOSX // on macos we don;t disable eager execution, it is very slow - "nTrain_Regression=500:nTest_Regression=100:SplitMode=Random:NormMode=NumEvents:!V"); -#else - "nTrain_Regression=1000:nTest_Regression=200:SplitMode=Random:NormMode=NumEvents:!V"); -#endif - // Book and train method - factory->BookMethod(dataloader, TMVA::Types::kPyKeras, "PyKeras", - "!H:!V:VarTransform=D,G:FilenameModel=kerasModelRegression.h5:FilenameTrainedModel=trainedKerasModelRegression.h5:NumEpochs=10:BatchSize=25:SaveBestOnly=false:Verbose=0"); - std::cout << "Train model..." << std::endl; - factory->TrainAllMethods(); - - // Clean-up - delete factory; - delete dataloader; - delete outputFile; - - // Setup reader - UInt_t numEvents = 100; - std::cout << "Run reader and estimate target of " << numEvents << " events..." << std::endl; - TMVA::Reader *reader = new TMVA::Reader("!Color:Silent"); - Float_t vars[3]; - reader->AddVariable("var1", vars+0); - reader->AddVariable("var2", vars+1); - reader->BookMVA("PyKeras", "datasetTestPyKerasRegression/weights/testPyKerasRegression_PyKeras.weights.xml"); - - // Get mean squared error on events - tree->SetBranchAddress("var1", vars+0); - tree->SetBranchAddress("var2", vars+1); - tree->SetBranchAddress("fvalue", vars+2); - - Float_t meanMvaError = 0; - for(UInt_t i=0; iGetEntry(i); - meanMvaError += std::pow(vars[2]-reader->EvaluateMVA("PyKeras"),2); - } - meanMvaError = meanMvaError/float(numEvents); - - // Check whether the response is obviously better than guessing - std::cout << "Mean squared error: " << meanMvaError << std::endl; -/* -#ifdef R__MACOSX - if(meanMvaError > 30.0){ -#else - if(meanMvaError > 60.0){ -#endif - std::cout << "[ERROR] Mean squared error is " << meanMvaError << " (>30.0)" << std::endl; - return 1; - } -*/ - - return 0; -} - -int main(){ - int err = testPyKerasRegression(); - return err; -} diff --git a/tmva/tmva/inc/TMVA/Types.h b/tmva/tmva/inc/TMVA/Types.h index 6f019e808a6cd..2c17538ce3e5d 100644 --- a/tmva/tmva/inc/TMVA/Types.h +++ b/tmva/tmva/inc/TMVA/Types.h @@ -100,7 +100,6 @@ namespace TMVA { kPyRandomForest , kPyAdaBoost , kPyGTB , - kPyKeras , kPyTorch , kC50 , kRSNNS , diff --git a/tutorials/CMakeLists.txt b/tutorials/CMakeLists.txt index 9d0b23f214f7c..48ede42279390 100644 --- a/tutorials/CMakeLists.txt +++ b/tutorials/CMakeLists.txt @@ -327,7 +327,7 @@ if(NOT TARGET Gui) endif() if (NOT ROOT_tmva_FOUND) - list(APPEND tmva_veto machine_learning/*.C machine_learning/*.py machine_learning/envelope/*.C machine_learning/keras/*.C machine_learning/keras/*.py machine_learning/pytorch/*.py ) + list(APPEND tmva_veto machine_learning/*.C machine_learning/*.py machine_learning/envelope/*.C machine_learning/pytorch/*.py ) else() #copy input data files configure_file(${CMAKE_CURRENT_SOURCE_DIR}/machine_learning/data/tmva_class_example.root ${CMAKE_CURRENT_BINARY_DIR}/machine_learning/data COPYONLY) @@ -370,8 +370,11 @@ else() list(APPEND tmva_veto machine_learning/TMVA_SOFIE_RDataFrame_JIT.C) list(APPEND tmva_veto machine_learning/TMVA_SOFIE_RSofieReader.C) endif() - if (NOT tmva-pymva) - # These SOFIE tutorials take models trained via PyMVA-PyKeras as input + if (TRUE) + # TODO: these SOFIE tutorials use models that were created with old TMVA + # PyKeras tutorials, which are now removed. The required models need to be + # created by other means, preferrable by tutorials that show Keras model + # training with the RBatchGenerator. list(APPEND tmva_veto machine_learning/TMVA_SOFIE_Keras_HiggsModel.C) list(APPEND tmva_veto machine_learning/TMVA_SOFIE_RDataFrame.C) list(APPEND tmva_veto machine_learning/TMVA_SOFIE_RDataFrame.py) @@ -644,10 +647,6 @@ set (machine_learning-TMVA_SOFIE_RDataFrame_JIT-depends tutorial-machine_learnin set (machine_learning-TMVA_SOFIE_Keras_HiggsModel-depends tutorial-machine_learning-TMVA_SOFIE_RDataFrame_JIT) set (machine_learning-TMVA_SOFIE_RDataFrame-depends tutorial-machine_learning-TMVA_SOFIE_Keras_HiggsModel) set (machine_learning-TMVA_SOFIE_Inference-depends tutorial-machine_learning-TMVA_SOFIE_RDataFrame) -set (machine_learning-keras-RegressionKeras-depends tutorial-machine_learning-pytorch-RegressionPyTorch-py) -set (machine_learning-keras-ClassificationKeras-depends tutorial-machine_learning-pytorch-ClassificationPyTorch-py) -set (machine_learning-keras-ApplicationRegressionKeras-depends tutorial-machine_learning-keras-RegressionKeras-py) -set (machine_learning-keras-ApplicationClassificationKeras-depends tutorial-machine_learning-keras-ClassificationKeras-py) #--List long-running tutorials to label them as "longtest" set (long_running @@ -840,11 +839,6 @@ if(ROOT_pyroot_FOUND) # Disable tutorial showing connection to the HTCondor service at CERN list(APPEND pyveto analysis/dataframe/distrdf004_dask_lxbatch.py) - if(NOT tmva-pymva) - file(GLOB tmva_veto_py RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} machine_learning/keras/*.py) - list(APPEND pyveto ${tmva_veto_py}) - endif() - if (ROOT_KERAS_FOUND) set (machine_learning-TMVA_SOFIE_RDataFrame-py-depends tutorial-machine_learning-TMVA_SOFIE_Keras_HiggsModel) endif() @@ -943,7 +937,6 @@ if(ROOT_pyroot_FOUND) machine_learning/TMVA_SOFIE_Inference.py machine_learning/TMVA_SOFIE_Models.py machine_learning/TMVA_SOFIE_RDataFrame.py - machine_learning/keras/*.py ) file(GLOB requires_torch RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} machine_learning/pytorch/*.py diff --git a/tutorials/machine_learning/TMVA_CNN_Classification.C b/tutorials/machine_learning/TMVA_CNN_Classification.C index a7c8915dbc69e..fc885352eead5 100644 --- a/tutorials/machine_learning/TMVA_CNN_Classification.C +++ b/tutorials/machine_learning/TMVA_CNN_Classification.C @@ -113,7 +113,6 @@ void MakeImagesTree(int n, int nh, int nw) /// but increase to at least 5000 to get a good result /// @param opt : vector of bool with method used (default all on if available). The order is: /// - TMVA CNN -/// - Keras CNN /// - TMVA DNN /// - TMVA BDT /// - PyTorch CNN @@ -131,7 +130,6 @@ void TMVA_CNN_Classification(int nevts = 1000, std::vector opt = {1, 1, 1, } bool useTMVACNN = (opt.size() > 0) ? opt[0] : false; - bool useKerasCNN = (opt.size() > 1) ? opt[1] : false; bool useTMVADNN = (opt.size() > 2) ? opt[2] : false; bool useTMVABDT = (opt.size() > 3) ? opt[3] : false; bool usePyTorchCNN = (opt.size() > 4) ? opt[4] : false; @@ -163,10 +161,8 @@ void TMVA_CNN_Classification(int nevts = 1000, std::vector opt = {1, 1, 1, #ifdef R__HAS_PYMVA gSystem->Setenv("KERAS_BACKEND", "tensorflow"); - // for using Keras TMVA::PyMethodBase::PyInitialize(); #else - useKerasCNN = false; usePyTorchCNN = false; #endif @@ -428,61 +424,11 @@ void TMVA_CNN_Classification(int nevts = 1000, std::vector opt = {1, 1, 1, factory.BookMethod(&loader, TMVA::Types::kDL, cnnMethodName, cnnOptions); } - /** - ### Book Convolutional Neural Network in Keras using a generated model - - **/ - #ifdef R__HAS_PYMVA // The next section uses Python packages, execute it only if PyMVA is available TString tmva_python_exe{TMVA::Python_Executable()}; TString python_exe = tmva_python_exe.IsNull() ? "python" : tmva_python_exe; - if (useKerasCNN) { - - Info("TMVA_CNN_Classification", "Building convolutional keras model"); - // create python script which can be executed - // create 2 conv2d layer + maxpool + dense - TMacro m; - m.AddLine("import tensorflow"); - m.AddLine("from tensorflow.keras.models import Sequential"); - m.AddLine("from tensorflow.keras.optimizers import Adam"); - m.AddLine( - "from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape, BatchNormalization"); - m.AddLine(""); - m.AddLine("model = Sequential() "); - m.AddLine("model.add(Reshape((16, 16, 1), input_shape = (256, )))"); - m.AddLine("model.add(Conv2D(10, kernel_size = (3, 3), kernel_initializer = 'glorot_normal',activation = " - "'relu', padding = 'same'))"); - m.AddLine("model.add(BatchNormalization())"); - m.AddLine("model.add(Conv2D(10, kernel_size = (3, 3), kernel_initializer = 'glorot_normal',activation = " - "'relu', padding = 'same'))"); - // m.AddLine("model.add(BatchNormalization())"); - m.AddLine("model.add(MaxPooling2D(pool_size = (2, 2), strides = (1,1))) "); - m.AddLine("model.add(Flatten())"); - m.AddLine("model.add(Dense(256, activation = 'relu')) "); - m.AddLine("model.add(Dense(2, activation = 'sigmoid')) "); - m.AddLine("model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])"); - m.AddLine("model.save('model_cnn.h5')"); - m.AddLine("model.summary()"); - - m.SaveSource("make_cnn_model.py"); - // execute - gSystem->Exec(python_exe + " make_cnn_model.py"); - - if (gSystem->AccessPathName("model_cnn.h5")) { - Warning("TMVA_CNN_Classification", "Error creating Keras model file - skip using Keras"); - } else { - // book PyKeras method only if Keras model could be created - Info("TMVA_CNN_Classification", "Booking tf.Keras CNN model"); - factory.BookMethod( - &loader, TMVA::Types::kPyKeras, "PyKeras", - "H:!V:VarTransform=None:FilenameModel=model_cnn.h5:tf.keras:" - "FilenameTrainedModel=trained_model_cnn.h5:NumEpochs=10:BatchSize=100:" - "GpuOptions=allow_growth=True"); // needed for RTX NVidia card and to avoid TF allocates all GPU memory - } - } - if (usePyTorchCNN) { Info("TMVA_CNN_Classification", "Using Convolutional PyTorch Model"); diff --git a/tutorials/machine_learning/TMVA_CNN_Classification.py b/tutorials/machine_learning/TMVA_CNN_Classification.py index 501d8e4e36bd8..6fc7b431fc0af 100644 --- a/tutorials/machine_learning/TMVA_CNN_Classification.py +++ b/tutorials/machine_learning/TMVA_CNN_Classification.py @@ -29,7 +29,6 @@ opt = [1, 1, 1, 1, 1] useTMVACNN = opt[0] if len(opt) > 0 else False -useKerasCNN = opt[1] if len(opt) > 1 else False useTMVADNN = opt[2] if len(opt) > 2 else False useTMVABDT = opt[3] if len(opt) > 3 else False usePyTorchCNN = opt[4] if len(opt) > 4 else False @@ -118,7 +117,6 @@ def MakeImagesTree(n, nh, nw): useTMVADNN = False if not "tmva-pymva" in ROOT.gROOT.GetConfigFeatures(): - useKerasCNN = False usePyTorchCNN = False else: TMVA.PyMethodBase.PyInitialize() @@ -408,8 +406,6 @@ def MakeImagesTree(n, nh, nw): ) -### Book Convolutional Neural Network in Keras using a generated model - if usePyTorchCNN: ROOT.Info("TMVA_CNN_Classification", "Using Convolutional PyTorch Model") @@ -441,53 +437,6 @@ def MakeImagesTree(n, nh, nw): "PyTorch is not installed or model building file is not existing - skip using PyTorch", ) -if useKerasCNN: - ROOT.Info("TMVA_CNN_Classification", "Building convolutional keras model") - # create python script which can be executed - # create 2 conv2d layer + maxpool + dense - import tensorflow - from tensorflow.keras.models import Sequential - from tensorflow.keras.optimizers import Adam - - # from keras.initializers import TruncatedNormal - # from keras import initializations - from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape - - # from keras.callbacks import ReduceLROnPlateau - model = Sequential() - model.add(Reshape((16, 16, 1), input_shape=(256,))) - model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same")) - model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same")) - # stride for maxpool is equal to pool size - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Flatten()) - model.add(Dense(64, activation="tanh")) - # model.add(Dropout(0.2)) - model.add(Dense(2, activation="sigmoid")) - model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"]) - model.save("model_cnn.h5") - model.summary() - - if not os.path.exists("model_cnn.h5"): - raise FileNotFoundError("Error creating Keras model file - skip using Keras") - else: - # book PyKeras method only if Keras model could be created - ROOT.Info("TMVA_CNN_Classification", "Booking convolutional keras model") - factory.BookMethod( - loader, - TMVA.Types.kPyKeras, - "PyKeras", - H=True, - V=False, - VarTransform=None, - FilenameModel="model_cnn.h5", - FilenameTrainedModel="trained_model_cnn.h5", - NumEpochs=max_epochs, - BatchSize=100, - GpuOptions="allow_growth=True", - ) # needed for RTX NVidia card and to avoid TF allocates all GPU memory - - ## Train Methods diff --git a/tutorials/machine_learning/TMVA_Higgs_Classification.C b/tutorials/machine_learning/TMVA_Higgs_Classification.C index 7cf1f6ac00690..c464f1acadd15 100644 --- a/tutorials/machine_learning/TMVA_Higgs_Classification.C +++ b/tutorials/machine_learning/TMVA_Higgs_Classification.C @@ -41,17 +41,14 @@ void TMVA_Higgs_Classification() { bool useMLP = false; // Multi Layer Perceptron (old TMVA NN implementation) bool useBDT = true; // Boosted Decision Tree bool useDL = true; // TMVA Deep learning ( CPU or GPU) - bool useKeras = true; // Keras Deep learning bool usePyTorch = true; // PyTorch Deep learning TMVA::Tools::Instance(); #ifdef R__HAS_PYMVA gSystem->Setenv("KERAS_BACKEND", "tensorflow"); - // for using Keras TMVA::PyMethodBase::PyInitialize(); #else - useKeras = false; usePyTorch = false; #endif @@ -287,47 +284,6 @@ We can then book the DL method using the built option string factory.BookMethod(loader, TMVA::Types::kDL, dnnMethodName, dnnOptions); } - // Keras deep learning - if (useKeras) { - - Info("TMVA_Higgs_Classification", "Building deep neural network with keras "); - // create python script which can be executed - // create 2 conv2d layer + maxpool + dense - TMacro m; - m.AddLine("import tensorflow"); - m.AddLine("from tensorflow.keras.models import Sequential"); - m.AddLine("from tensorflow.keras.optimizers import Adam"); - m.AddLine("from tensorflow.keras.layers import Input, Dense"); - m.AddLine(""); - m.AddLine("model = Sequential() "); - m.AddLine("model.add(Dense(64, activation='relu',input_dim=7))"); - m.AddLine("model.add(Dense(64, activation='relu'))"); - m.AddLine("model.add(Dense(64, activation='relu'))"); - m.AddLine("model.add(Dense(64, activation='relu'))"); - m.AddLine("model.add(Dense(2, activation='sigmoid'))"); - m.AddLine("model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])"); - m.AddLine("model.save('Higgs_model.h5')"); - m.AddLine("model.summary()"); - - m.SaveSource("make_higgs_model.py"); - // execute - auto ret = (TString *)gROOT->ProcessLine("TMVA::Python_Executable()"); - TString python_exe = (ret) ? *(ret) : "python"; - gSystem->Exec(python_exe + " make_higgs_model.py"); - - if (gSystem->AccessPathName("Higgs_model.h5")) { - Warning("TMVA_Higgs_Classification", "Error creating Keras model file - skip using Keras"); - } else { - // book PyKeras method only if Keras model could be created - Info("TMVA_Higgs_Classification", "Booking tf.Keras Dense model"); - factory.BookMethod( - loader, TMVA::Types::kPyKeras, "PyKeras", - "H:!V:VarTransform=None:FilenameModel=Higgs_model.h5:tf.keras:" - "FilenameTrainedModel=Higgs_trained_model.h5:NumEpochs=20:BatchSize=100:" - "GpuOptions=allow_growth=True"); // needed for RTX NVidia card and to avoid TF allocates all GPU memory - } - } - /** ## Train Methods diff --git a/tutorials/machine_learning/TMVA_Higgs_Classification.py b/tutorials/machine_learning/TMVA_Higgs_Classification.py index 4ba730b248477..949c18b910612 100644 --- a/tutorials/machine_learning/TMVA_Higgs_Classification.py +++ b/tutorials/machine_learning/TMVA_Higgs_Classification.py @@ -44,19 +44,9 @@ useMLP = False # Multi Layer Perceptron (old TMVA NN implementation) useBDT = True # Boosted Decision Tree useDL = True # TMVA Deep learning ( CPU or GPU) -useKeras = True # Use Keras Deep Learning via PyMVA if ROOT.gSystem.GetFromPipe("root-config --has-tmva-pymva") == "yes": TMVA.PyMethodBase.PyInitialize() -else: - useKeras = False # cannot use Keras if PYMVA is not available - -if useKeras: - try: - import tensorflow - except: - ROOT.Warning("TMVA_Higgs_Classification", "Skip using Keras since tensorflow is not available") - useKeras = False outputFile = TFile.Open("Higgs_ClassificationOutput.root", "RECREATE") factory = TMVA.Factory( @@ -311,44 +301,6 @@ Architecture=arch, ) -# Keras DL -if useKeras: - ROOT.Info("TMVA_Higgs_Classification", "Building Deep Learning keras model") - # create Keras model with 4 layers of 64 units and relu activations - import tensorflow - from tensorflow.keras.models import Sequential - from tensorflow.keras.optimizers import Adam - from tensorflow.keras.layers import Input, Dense - - model = Sequential() - model.add(Dense(64, activation="relu", input_dim=7)) - model.add(Dense(64, activation="relu")) - model.add(Dense(64, activation="relu")) - model.add(Dense(64, activation="relu")) - model.add(Dense(2, activation="sigmoid")) - model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"]) - model.save("model_higgs.h5") - model.summary() - - if not os.path.exists("model_higgs.h5"): - raise FileNotFoundError("Error creating Keras model file - skip using Keras") - else: - # book PyKeras method only if Keras model could be created - ROOT.Info("TMVA_Higgs_Classification", "Booking Deep Learning keras model") - factory.BookMethod( - loader, - TMVA.Types.kPyKeras, - "PyKeras", - H=True, - V=False, - VarTransform=None, - FilenameModel="model_higgs.h5", - FilenameTrainedModel="trained_model_higgs.h5", - NumEpochs=20, - BatchSize=100, - ) -# GpuOptions="allow_growth=True", -# ) # needed for RTX NVidia card and to avoid TF allocates all GPU memory ## Train Methods diff --git a/tutorials/machine_learning/TMVA_RNN_Classification.C b/tutorials/machine_learning/TMVA_RNN_Classification.C index a7cbccdcfd615..302087ae70340 100644 --- a/tutorials/machine_learning/TMVA_RNN_Classification.C +++ b/tutorials/machine_learning/TMVA_RNN_Classification.C @@ -151,12 +151,9 @@ void TMVA_RNN_Classification(int nevts = 2000, int use_type = 1) int nTotEvts = nevts; // total events to be generated for signal or background - bool useKeras = true; - - bool useTMVA_RNN = true; bool useTMVA_DNN = true; - bool useTMVA_BDT = false; + bool useTMVA_BDT = true; std::vector rnn_types = {"RNN", "LSTM", "GRU"}; std::vector use_rnn_type = {1, 1, 1}; @@ -185,8 +182,6 @@ void TMVA_RNN_Classification(int nevts = 2000, int use_type = 1) #ifdef R__HAS_PYMVA TMVA::PyMethodBase::PyInitialize(); -#else - useKeras = false; #endif #ifdef R__USE_IMT @@ -384,79 +379,6 @@ the option string factory->BookMethod(dataloader, TMVA::Types::kDL, dnnName, dnnOptions); } - /** - ## Book Keras recurrent models - - Book the different types of recurrent models in Keras (SimpleRNN, LSTM or GRU) - - **/ - - if (useKeras) { - - for (int i = 0; i < 3; i++) { - - if (use_rnn_type[i]) { - - TString modelName = TString::Format("model_%s.h5", rnn_types[i].c_str()); - TString trainedModelName = TString::Format("trained_model_%s.h5", rnn_types[i].c_str()); - - Info("TMVA_RNN_Classification", "Building recurrent keras model using a %s layer", rnn_types[i].c_str()); - // create python script which can be executed - // create 2 conv2d layer + maxpool + dense - TMacro m; - m.AddLine("import tensorflow"); - m.AddLine("from tensorflow.keras.models import Sequential"); - m.AddLine("from tensorflow.keras.optimizers import Adam"); - m.AddLine("from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, SimpleRNN, GRU, LSTM, Reshape, " - "BatchNormalization"); - m.AddLine(""); - m.AddLine("model = Sequential() "); - m.AddLine("model.add(Reshape((10, 30), input_shape = (10*30, )))"); - // add recurrent neural network depending on type / Use option to return the full output - if (rnn_types[i] == "LSTM") - m.AddLine("model.add(LSTM(units=10, return_sequences=True) )"); - else if (rnn_types[i] == "GRU") - m.AddLine("model.add(GRU(units=10, return_sequences=True) )"); - else - m.AddLine("model.add(SimpleRNN(units=10, return_sequences=True) )"); - - // m.AddLine("model.add(BatchNormalization())"); - m.AddLine("model.add(Flatten())"); // needed if returning the full time output sequence - m.AddLine("model.add(Dense(64, activation = 'tanh')) "); - m.AddLine("model.add(Dense(2, activation = 'sigmoid')) "); - m.AddLine( - "model.compile(loss = 'binary_crossentropy', optimizer = Adam(learning_rate = 0.001), weighted_metrics = ['accuracy'])"); - m.AddLine(TString::Format("modelName = '%s'", modelName.Data())); - m.AddLine("model.save(modelName)"); - m.AddLine("model.summary()"); - - m.SaveSource("make_rnn_model.py"); - // execute python script to make the model - auto ret = (TString *)gROOT->ProcessLine("TMVA::Python_Executable()"); - TString python_exe = (ret) ? *(ret) : "python"; - gSystem->Exec(python_exe + " make_rnn_model.py"); - - if (gSystem->AccessPathName(modelName)) { - Warning("TMVA_RNN_Classification", "Error creating Keras recurrent model file - Skip using Keras"); - useKeras = false; - } else { - // book PyKeras method only if Keras model could be created - Info("TMVA_RNN_Classification", "Booking Keras %s model", rnn_types[i].c_str()); - factory->BookMethod(dataloader, TMVA::Types::kPyKeras, - TString::Format("PyKeras_%s", rnn_types[i].c_str()), - TString::Format("!H:!V:VarTransform=None:FilenameModel=%s:tf.keras:" - "FilenameTrainedModel=%s:GpuOptions=allow_growth=True:" - "NumEpochs=%d:BatchSize=%d", - modelName.Data(), trainedModelName.Data(), maxepochs, batchSize)); - } - } - } - } - - // use BDT in case not using Keras or TMVA DL - if (!useKeras || !useTMVA_BDT) - useTMVA_BDT = true; - /** ## Book TMVA BDT **/ diff --git a/tutorials/machine_learning/TMVA_RNN_Classification.py b/tutorials/machine_learning/TMVA_RNN_Classification.py index 9ddec7a30cde5..b77f165ac87c3 100644 --- a/tutorials/machine_learning/TMVA_RNN_Classification.py +++ b/tutorials/machine_learning/TMVA_RNN_Classification.py @@ -152,22 +152,9 @@ def MakeTimeData(n, ntime, ndim): nTotEvts = 2000 # total events to be generated for signal or background -useKeras = False - useTMVA_RNN = True useTMVA_DNN = True -useTMVA_BDT = False - -if ROOT.gSystem.GetFromPipe("root-config --has-tmva-pymva") == "yes": - useKeras = True - -if useKeras: - try: - import tensorflow - except: - ROOT.Warning("TMVA_RNN_Classification", "Skip using Keras since tensorflow cannot be imported") - useKeras = False - +useTMVA_BDT = True rnn_types = ["RNN", "LSTM", "GRU"] use_rnn_type = [1, 1, 1] @@ -195,8 +182,6 @@ def MakeTimeData(n, ntime, ndim): if "tmva-pymva" in ROOT.gROOT.GetConfigFeatures(): TMVA.PyMethodBase.PyInitialize() -else: - useKeras = False @@ -379,69 +364,6 @@ def MakeTimeData(n, ntime, ndim): ) -## Book Keras recurrent models - -# Book the different types of recurrent models in Keras (SimpleRNN, LSTM or GRU) - - -if useKeras: - for i in range(3): - if use_rnn_type[i]: - modelName = "model_" + rnn_types[i] + ".h5" - trainedModelName = "trained_" + modelName - print("Building recurrent keras model using a", rnn_types[i], "layer") - # create python script which can be executed - # create 2 conv2d layer + maxpool + dense - from tensorflow.keras.models import Sequential - from tensorflow.keras.optimizers import Adam - - # from keras.initializers import TruncatedNormal - # from keras import initializations - from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, SimpleRNN, GRU, LSTM, Reshape, BatchNormalization - - model = Sequential() - model.add(Reshape((10, 30), input_shape=(10 * 30,))) - # add recurrent neural network depending on type / Use option to return the full output - if rnn_types[i] == "LSTM": - model.add(LSTM(units=10, return_sequences=True)) - elif rnn_types[i] == "GRU": - model.add(GRU(units=10, return_sequences=True)) - else: - model.add(SimpleRNN(units=10, return_sequences=True)) - # m.AddLine("model.add(BatchNormalization())"); - model.add(Flatten()) # needed if returning the full time output sequence - model.add(Dense(64, activation="tanh")) - model.add(Dense(2, activation="sigmoid")) - model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"]) - model.save(modelName) - model.summary() - print("saved recurrent model", modelName) - - if not os.path.exists(modelName): - useKeras = False - print("Error creating Keras recurrent model file - Skip using Keras") - else: - # book PyKeras method only if Keras model could be created - print("Booking Keras model ", rnn_types[i]) - factory.BookMethod( - dataloader, - TMVA.Types.kPyKeras, - "PyKeras_" + rnn_types[i], - H=True, - V=False, - VarTransform=None, - FilenameModel=modelName, - FilenameTrainedModel="trained_" + modelName, - NumEpochs=maxepochs, - BatchSize=batchSize, - GpuOptions="allow_growth=True", - ) - - -# use BDT in case not using Keras or TMVA DL -if not useKeras or not useTMVA_BDT: - useTMVA_BDT = True - ## Book TMVA BDT diff --git a/tutorials/machine_learning/keras/ApplicationClassificationKeras.py b/tutorials/machine_learning/keras/ApplicationClassificationKeras.py deleted file mode 100755 index 85c7882fb75e4..0000000000000 --- a/tutorials/machine_learning/keras/ApplicationClassificationKeras.py +++ /dev/null @@ -1,47 +0,0 @@ -## \file -## \ingroup tutorial_tmva_keras -## \notebook -nodraw -## This tutorial shows how to apply a trained model to new data. -## -## \macro_code -## -## \date 2017 -## \author TMVA Team - -from ROOT import TMVA, TFile, TString, gROOT -from array import array -from subprocess import call -from os.path import isfile - -# Setup TMVA -TMVA.Tools.Instance() -TMVA.PyMethodBase.PyInitialize() -reader = TMVA.Reader("Color:!Silent") - -# Load data -data = TFile.Open(str(gROOT.GetTutorialDir()) + "/machine_learning/data/tmva_class_example.root") -signal = data.Get('TreeS') -background = data.Get('TreeB') - -branches = {} -for branch in signal.GetListOfBranches(): - branchName = branch.GetName() - branches[branchName] = array('f', [-999]) - reader.AddVariable(branchName, branches[branchName]) - signal.SetBranchAddress(branchName, branches[branchName]) - background.SetBranchAddress(branchName, branches[branchName]) - -# Book methods -reader.BookMVA('PyKeras', TString('dataset/weights/TMVAClassification_PyKeras.weights.xml')) - -# Print some example classifications -print('Some signal example classifications:') -for i in range(20): - signal.GetEntry(i) - print(reader.EvaluateMVA('PyKeras')) -print('') - -print('Some background example classifications:') -for i in range(20): - background.GetEntry(i) - print(reader.EvaluateMVA('PyKeras')) diff --git a/tutorials/machine_learning/keras/ApplicationRegressionKeras.py b/tutorials/machine_learning/keras/ApplicationRegressionKeras.py deleted file mode 100755 index 7055236e23047..0000000000000 --- a/tutorials/machine_learning/keras/ApplicationRegressionKeras.py +++ /dev/null @@ -1,40 +0,0 @@ -## \file -## \ingroup tutorial_tmva_keras -## \notebook -nodraw -## This tutorial shows how to apply a trained model to new data (regression). -## -## \macro_code -## -## \date 2017 -## \author TMVA Team - -from ROOT import TMVA, TFile, TString, gROOT -from array import array -from subprocess import call -from os.path import isfile - -# Setup TMVA -TMVA.Tools.Instance() -TMVA.PyMethodBase.PyInitialize() -reader = TMVA.Reader("Color:!Silent") - -# Load data -data = TFile.Open(str(gROOT.GetTutorialDir()) + '/machine_learning/data/tmva_reg_example.root') -tree = data.Get('TreeR') - -branches = {} -for branch in tree.GetListOfBranches(): - branchName = branch.GetName() - branches[branchName] = array('f', [-999]) - tree.SetBranchAddress(branchName, branches[branchName]) - if branchName != 'fvalue': - reader.AddVariable(branchName, branches[branchName]) - -# Book methods -reader.BookMVA('PyKeras', TString('dataset/weights/TMVARegression_PyKeras.weights.xml')) - -# Print some example regressions -print('Some example regressions:') -for i in range(20): - tree.GetEntry(i) - print('True/MVA value: {}/{}'.format(branches['fvalue'][0],reader.EvaluateMVA('PyKeras'))) diff --git a/tutorials/machine_learning/keras/ClassificationKeras.py b/tutorials/machine_learning/keras/ClassificationKeras.py deleted file mode 100755 index 3baad783676e9..0000000000000 --- a/tutorials/machine_learning/keras/ClassificationKeras.py +++ /dev/null @@ -1,76 +0,0 @@ -## \file -## \ingroup tutorial_tmva_keras -## \notebook -nodraw -## This tutorial shows how to do classification in TMVA with neural networks -## trained with keras. -## -## \macro_code -## -## \date 2017 -## \author TMVA Team - -from ROOT import TMVA, TFile, TCut, gROOT -from subprocess import call -from os.path import isfile - -from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense -from tensorflow.keras.optimizers import SGD - - -def create_model(): - # Generate model - - # Define model - model = Sequential() - model.add(Dense(64, activation='relu', input_dim=4)) - model.add(Dense(2, activation='softmax')) - - # Set loss and optimizer - model.compile(loss='categorical_crossentropy', - optimizer=SGD(learning_rate=0.01), weighted_metrics=['accuracy', ]) - - # Store model to file - model.save('modelClassification.h5') - model.summary() - - -def run(): - with TFile.Open('TMVA_Classification_Keras.root', 'RECREATE') as output, TFile.Open(str(gROOT.GetTutorialDir()) + '/machine_learning/data/tmva_class_example.root') as data: - factory = TMVA.Factory('TMVAClassification', output, - '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') - - signal = data.Get('TreeS') - background = data.Get('TreeB') - - dataloader = TMVA.DataLoader('dataset') - for branch in signal.GetListOfBranches(): - dataloader.AddVariable(branch.GetName()) - - dataloader.AddSignalTree(signal, 1.0) - dataloader.AddBackgroundTree(background, 1.0) - dataloader.PrepareTrainingAndTestTree(TCut(''), - 'nTrain_Signal=4000:nTrain_Background=4000:SplitMode=Random:NormMode=NumEvents:!V') - - # Book methods - factory.BookMethod(dataloader, TMVA.Types.kFisher, 'Fisher', - '!H:!V:Fisher:VarTransform=D,G') - factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'PyKeras', - 'H:!V:VarTransform=D,G:FilenameModel=modelClassification.h5:FilenameTrainedModel=trainedModelClassification.h5:NumEpochs=20:BatchSize=32:LearningRateSchedule=10,0.01;20,0.005') - - # Run training, test and evaluation - factory.TrainAllMethods() - factory.TestAllMethods() - factory.EvaluateAllMethods() - - -if __name__ == "__main__": - # Setup TMVA - TMVA.Tools.Instance() - TMVA.PyMethodBase.PyInitialize() - - # Create and store the ML model - create_model() - - # Run TMVA - run() diff --git a/tutorials/machine_learning/keras/GenerateModel.py b/tutorials/machine_learning/keras/GenerateModel.py deleted file mode 100755 index 832d54e410dc2..0000000000000 --- a/tutorials/machine_learning/keras/GenerateModel.py +++ /dev/null @@ -1,62 +0,0 @@ -## \file -## \ingroup tutorial_tmva_keras -## \notebook -nodraw -## This tutorial shows how to define and generate a keras model for use with -## TMVA. -## -## \macro_code -## -## \date 2017 -## \author TMVA Team - -from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense, Activation -from tensorflow.keras.regularizers import l2 -from tensorflow.keras.optimizers import SGD -from tensorflow.keras.utils import plot_model - -# Setup the model here -num_input_nodes = 4 -num_output_nodes = 2 -num_hidden_layers = 1 -nodes_hidden_layer = 64 -l2_val = 1e-5 - -model = Sequential() - -# Hidden layer 1 -# NOTE: Number of input nodes need to be defined in this layer -model.add(Dense(nodes_hidden_layer, activation='relu', kernel_regularizer=l2(l2_val), input_dim=num_input_nodes)) - -# Hidden layer 2 to num_hidden_layers -# NOTE: Here, you can do what you want -for k in range(num_hidden_layers-1): - model.add(Dense(nodes_hidden_layer, activation='relu', kernel_regularizer=l2(l2_val))) - -# Output layer -# NOTE: Use following output types for the different tasks -# Binary classification: 2 output nodes with 'softmax' activation -# Regression: 1 output with any activation ('linear' recommended) -# Multiclass classification: (number of classes) output nodes with 'softmax' activation -model.add(Dense(num_output_nodes, activation='softmax')) - -# Compile model -# NOTE: Use following settings for the different tasks -# Any classification: 'categorical_crossentropy' is recommended loss function -# Regression: 'mean_squared_error' is recommended loss function -model.compile(loss='categorical_crossentropy', optimizer=SGD(learning_rate=0.01), weighted_metrics=['accuracy',]) - -# Save model -model.save('model.h5') - -# Additional information about the model -# NOTE: This is not needed to run the model - -# Print summary -model.summary() - -# Visualize model as graph -try: - plot_model(model, to_file='model.png', show_shapes=True) -except: - print('[INFO] Failed to make model plot') diff --git a/tutorials/machine_learning/keras/MulticlassKeras.py b/tutorials/machine_learning/keras/MulticlassKeras.py deleted file mode 100755 index 358b8197f9bd2..0000000000000 --- a/tutorials/machine_learning/keras/MulticlassKeras.py +++ /dev/null @@ -1,84 +0,0 @@ -## \file -## \ingroup tutorial_tmva_keras -## \notebook -nodraw -## This tutorial shows how to do multiclass classification in TMVA with neural -## networks trained with keras. -## -## \macro_code -## -## \date 2017 -## \author TMVA Team - -from ROOT import TMVA, TFile, TCut, gROOT -from os.path import isfile - -from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense -from tensorflow.keras.optimizers import SGD - - -def create_model(): - # Define model - model = Sequential() - model.add(Dense(32, activation='relu', input_dim=4)) - model.add(Dense(4, activation='softmax')) - - # Set loss and optimizer - model.compile(loss='categorical_crossentropy', optimizer=SGD( - learning_rate=0.01), weighted_metrics=['accuracy',]) - - # Store model to file - model.save('modelMultiClass.h5') - model.summary() - - -def run(): - with TFile.Open('TMVA.root', 'RECREATE') as output, TFile.Open('tmva_example_multiple_background.root') as data: - factory = TMVA.Factory('TMVAClassification', output, - '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=multiclass') - - signal = data.Get('TreeS') - background0 = data.Get('TreeB0') - background1 = data.Get('TreeB1') - background2 = data.Get('TreeB2') - - dataloader = TMVA.DataLoader('dataset') - for branch in signal.GetListOfBranches(): - dataloader.AddVariable(branch.GetName()) - - dataloader.AddTree(signal, 'Signal') - dataloader.AddTree(background0, 'Background_0') - dataloader.AddTree(background1, 'Background_1') - dataloader.AddTree(background2, 'Background_2') - dataloader.PrepareTrainingAndTestTree(TCut(''), - 'SplitMode=Random:NormMode=NumEvents:!V') - - # Book methods - factory.BookMethod(dataloader, TMVA.Types.kFisher, 'Fisher', - '!H:!V:Fisher:VarTransform=D,G') - factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'PyKeras', - 'H:!V:VarTransform=D,G:FilenameModel=modelMultiClass.h5:FilenameTrainedModel=trainedModelMultiClass.h5:NumEpochs=20:BatchSize=32') - - # Run TMVA - factory.TrainAllMethods() - factory.TestAllMethods() - factory.EvaluateAllMethods() - - -if __name__ == "__main__": - # Generate model - create_model() - - # Setup TMVA - TMVA.Tools.Instance() - TMVA.PyMethodBase.PyInitialize() - - # Load data - if not isfile('tmva_example_multiple_background.root'): - createDataMacro = str(gROOT.GetTutorialDir()) + '/machine_learning/createData.C' - print(createDataMacro) - gROOT.ProcessLine('.L {}'.format(createDataMacro)) - gROOT.ProcessLine('create_MultipleBackground(4000)') - - # Run TMVA - run() diff --git a/tutorials/machine_learning/keras/RegressionKeras.py b/tutorials/machine_learning/keras/RegressionKeras.py deleted file mode 100755 index dce84773d8b7f..0000000000000 --- a/tutorials/machine_learning/keras/RegressionKeras.py +++ /dev/null @@ -1,77 +0,0 @@ -## \file -## \ingroup tutorial_tmva_keras -## \notebook -nodraw -## This tutorial shows how to do regression in TMVA with neural networks -## trained with keras. -## -## \macro_code -## -## \date 2017 -## \author TMVA Team - -from ROOT import TMVA, TFile, TCut, gROOT -from subprocess import call -from os.path import isfile - -from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense -from tensorflow.keras.optimizers import SGD - - -def create_model(): - # Define model - model = Sequential() - model.add(Dense(64, activation='tanh', input_dim=2)) - model.add(Dense(1, activation='linear')) - - # Set loss and optimizer - model.compile(loss='mean_squared_error', optimizer=SGD( - learning_rate=0.01), weighted_metrics=[]) - - # Store model to file - model.save('modelRegression.h5') - model.summary() - - -def run(): - - with TFile.Open('TMVA_Regression_Keras.root', 'RECREATE') as output, TFile.Open(str(gROOT.GetTutorialDir()) + '/machine_learning/data/tmva_reg_example.root') as data: - factory = TMVA.Factory('TMVARegression', output, - '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Regression') - - tree = data.Get('TreeR') - - dataloader = TMVA.DataLoader('dataset') - for branch in tree.GetListOfBranches(): - name = branch.GetName() - if name != 'fvalue': - dataloader.AddVariable(name) - dataloader.AddTarget('fvalue') - - dataloader.AddRegressionTree(tree, 1.0) - # use only 1000 events since evaluation is very slow (especially on MacOS). Increase it to get meaningful results - dataloader.PrepareTrainingAndTestTree(TCut(''), - 'nTrain_Regression=1000:SplitMode=Random:NormMode=NumEvents:!V') - - # Book methods - factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'PyKeras', - 'H:!V:VarTransform=D,G:FilenameModel=modelRegression.h5:FilenameTrainedModel=trainedModelRegression.h5:NumEpochs=20:BatchSize=32') - factory.BookMethod(dataloader, TMVA.Types.kBDT, 'BDTG', - '!H:!V:VarTransform=D,G:NTrees=1000:BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=4') - - # Run TMVA - factory.TrainAllMethods() - factory.TestAllMethods() - factory.EvaluateAllMethods() - - -if __name__ == "__main__": - # Setup TMVA - TMVA.Tools.Instance() - TMVA.PyMethodBase.PyInitialize() - - # Generate model - create_model() - - # Run TMVA - run() diff --git a/tutorials/machine_learning/keras/index.md b/tutorials/machine_learning/keras/index.md deleted file mode 100644 index 3f5b7d4e01462..0000000000000 --- a/tutorials/machine_learning/keras/index.md +++ /dev/null @@ -1,3 +0,0 @@ -\defgroup tutorial_tmva_keras TMVA Keras tutorials -\ingroup tutorial_ml -\brief Example code which illustrates how to use keras with the python interface of TMVA