diff --git a/examples/input.nn.recommended b/examples/input.nn.recommended index e53934e0f..a0030bf52 100644 --- a/examples/input.nn.recommended +++ b/examples/input.nn.recommended @@ -35,6 +35,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/Cu2S_PBE/input.nn b/examples/nnp-train/Cu2S_PBE/input.nn index b4aa9333e..23eccf6db 100644 --- a/examples/nnp-train/Cu2S_PBE/input.nn +++ b/examples/nnp-train/Cu2S_PBE/input.nn @@ -44,6 +44,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/H2O_RPBE-D3/input.nn b/examples/nnp-train/H2O_RPBE-D3/input.nn index b2666eecd..05b3fcacb 100644 --- a/examples/nnp-train/H2O_RPBE-D3/input.nn +++ b/examples/nnp-train/H2O_RPBE-D3/input.nn @@ -44,6 +44,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/LJ/input.nn b/examples/nnp-train/LJ/input.nn index d42118f4b..2a1812341 100644 --- a/examples/nnp-train/LJ/input.nn +++ b/examples/nnp-train/LJ/input.nn @@ -34,6 +34,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/src/libnnp/Mode.cpp b/src/libnnp/Mode.cpp index 475ec8ff3..bd7359734 100644 --- a/src/libnnp/Mode.cpp +++ b/src/libnnp/Mode.cpp @@ -884,7 +884,9 @@ void Mode::setupNeuralNetworkWeights(string const& fileNameFormat) weights.push_back(atof(splitLine.at(0).c_str())); } } - it->neuralNetwork->setConnections(&(weights.front())); + // Attention: need alternative (old) ordering scheme here for + // backward compatibility! + it->neuralNetwork->setConnectionsAO(&(weights.front())); file.close(); } diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp index e7b7808c6..ea812a336 100644 --- a/src/libnnp/NeuralNetwork.cpp +++ b/src/libnnp/NeuralNetwork.cpp @@ -80,12 +80,26 @@ NeuralNetwork(int numLayers, weightOffset[i] = weightOffset[i-1] + (layers[i-1].numNeurons + 1) * layers[i].numNeurons; } +#ifndef ALTERNATIVE_WEIGHT_ORDERING + biasOffset = new int*[numLayers-1]; + for (int i = 0; i < numLayers-1; i++) + { + biasOffset[i] = new int[layers[i+1].numNeurons]; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + biasOffset[i][j] = weightOffset[i] + + (layers[i+1].numNeuronsPrevLayer + 1) + * (j + 1) - 1; + } + } +#else biasOffset = new int[numLayers-1]; for (int i = 0; i < numLayers-1; i++) { biasOffset[i] = weightOffset[i] + layers[i+1].numNeurons * layers[i].numNeurons; } +#endif biasOnlyOffset = new int[numLayers-1]; biasOnlyOffset[0] = 0; for (int i = 1; i < numLayers-1; i++) @@ -106,7 +120,15 @@ NeuralNetwork::~NeuralNetwork() } delete[] layers; delete[] weightOffset; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (int i = 0; i < numLayers - 1; i++) + { + delete[] biasOffset[i]; + } + delete[] biasOffset; +#else delete[] biasOffset; +#endif delete[] biasOnlyOffset; } @@ -148,6 +170,27 @@ void NeuralNetwork::setConnections(double const* const& connections) { int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + layers[i].neurons[j].weights[k] = connections[count]; + count++; + } + layers[i].neurons[j].bias = connections[count]; + count++; + } + } + + return; +} + +void NeuralNetwork::setConnectionsAO(double const* const& connections) +{ + int count = 0; + for (int i = 1; i < numLayers; i++) { for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++) @@ -172,6 +215,27 @@ void NeuralNetwork::getConnections(double* connections) const { int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + connections[count] = layers[i].neurons[j].weights[k] ; + count++; + } + connections[count] = layers[i].neurons[j].bias; + count++; + } + } + + return; +} + +void NeuralNetwork::getConnectionsAO(double* connections) const +{ + int count = 0; + for (int i = 1; i < numLayers; i++) { for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++) @@ -428,6 +492,52 @@ void NeuralNetwork::calculateDEdG(double *dEdG) const void NeuralNetwork::calculateDEdc(double* dEdc) const { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + int count = 0; + + for (int i = 0; i < numConnections; i++) + { + dEdc[i] = 0.0; + } + + for (int i = 0; i < outputLayer->numNeurons; i++) + { + dEdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].dfdx; + if (normalizeNeurons) + { + dEdc[biasOffset[numLayers-2][i]] /= + outputLayer->numNeuronsPrevLayer; + } + } + + for (int i = numLayers - 2; i >= 0; i--) + { + count = 0; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + dEdc[weightOffset[i]+count] = dEdc[biasOffset[i][j]] + * layers[i].neurons[k].value; + count++; + if (i >= 1) + { + dEdc[biasOffset[i-1][k]] += dEdc[biasOffset[i][j]] + * layers[i+1].neurons[j].weights[k] + * layers[i].neurons[k].dfdx; + } + } + count++; + } + if (normalizeNeurons && i >= 1) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + dEdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer; + } + } + } +#else int count = 0; for (int i = 0; i < numConnections; i++) @@ -468,6 +578,7 @@ void NeuralNetwork::calculateDEdc(double* dEdc) const } } } +#endif return; } @@ -512,6 +623,67 @@ void NeuralNetwork::calculateDFdc(double* dFdc, } void NeuralNetwork::writeConnections(std::ofstream& file) const +{ + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back("Neural network connection values (weights and biases)."); + colSize.push_back(24); + colName.push_back("connection"); + colInfo.push_back("Neural network connection value."); + colSize.push_back(1); + colName.push_back("t"); + colInfo.push_back("Connection type (a = weight, b = bias)."); + colSize.push_back(9); + colName.push_back("index"); + colInfo.push_back("Index enumerating weights."); + colSize.push_back(5); + colName.push_back("l_s"); + colInfo.push_back("Starting point layer (end point layer for biases)."); + colSize.push_back(5); + colName.push_back("n_s"); + colInfo.push_back("Starting point neuron in starting layer (end point " + "neuron for biases)."); + colSize.push_back(5); + colName.push_back("l_e"); + colInfo.push_back("End point layer."); + colSize.push_back(5); + colName.push_back("n_e"); + colInfo.push_back("End point neuron in end layer."); + appendLinesToFile(file, + createFileHeader(title, colSize, colName, colInfo)); + + int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + count++; + file << strpr("%24.16E a %9d %5d %5d %5d %5d\n", + layers[i].neurons[j].weights[k], + count, + i - 1, + k + 1, + i, + j + 1); + } + count++; + file << strpr("%24.16E b %9d %5d %5d\n", + layers[i].neurons[j].bias, + count, + i, + j + 1); + } + } + + return; +} + +void NeuralNetwork::writeConnectionsAO(std::ofstream& file) const { // File header. vector title; @@ -645,6 +817,67 @@ void NeuralNetwork::calculateD2EdGdc(int index, double const* const& dEdb, double* d2EdGdc) const { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + int count = 0; + + for (int i = 0; i < outputLayer->numNeurons; i++) + { + d2EdGdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].d2fdx2 + * outputLayer->neurons[i].dxdG; + if (normalizeNeurons) + { + d2EdGdc[biasOffset[numLayers-2][i]] /= + outputLayer->numNeuronsPrevLayer; + } + } + + for (int i = numLayers-2; i >= 0; i--) + { + count = 0; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + if (i == 0) + { + d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].value; + if (k == index) + { + d2EdGdc[weightOffset[i]+count] += + dEdb[biasOnlyOffset[i]+j]; + } + } + else + { + d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].value + + dEdb[biasOnlyOffset[i]+j] * layers[i].neurons[k].dfdx + * layers[i].neurons[k].dxdG; + } + count++; + if (i >= 1) + { + d2EdGdc[biasOffset[i-1][k]] += + layers[i+1].neurons[j].weights[k] + * (d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].dfdx + + dEdb[biasOnlyOffset[i]+j] + * layers[i].neurons[k].d2fdx2 + * layers[i].neurons[k].dxdG); + } + } + count++; + } + for (int k = 0; k < layers[i].numNeurons; k++) + { + if (normalizeNeurons && i >= 1) + { + d2EdGdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer; + } + } + } +#else int count = 0; for (int i = 0; i < outputLayer->numNeurons; i++) @@ -699,6 +932,7 @@ void NeuralNetwork::calculateD2EdGdc(int index, } } } +#endif return; } @@ -921,6 +1155,45 @@ void NeuralNetwork::getNeuronStatistics(long* count, return; } +vector> NeuralNetwork::getLayerLimits() const +{ + vector> limits; + + for (int i = 0; i < numLayers - 2; ++i) + { + limits.push_back(make_pair(weightOffset[i], + weightOffset[i + 1] - 1)); + } + limits.push_back(make_pair(weightOffset[numLayers - 2], + numConnections - 1)); + + return limits; +} + +vector> NeuralNetwork::getNeuronLimits() const +{ + vector> limits; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (int i = 0; i < numLayers - 1; ++i) + { + limits.push_back(make_pair(weightOffset[i], + biasOffset[i][0])); + for (int j = 0; j < layers[i+1].numNeurons - 1; ++j) + { + limits.push_back(make_pair(biasOffset[i][j] + 1, + biasOffset[i][j+1])); + + } + } +#else + throw runtime_error("ERROR: Neuron boundaries not available with " + "alternative (old) weight memory layout, recompile " + "without -DALTERNATIVE_WEIGHT_ORDERING.\n"); +#endif + + return limits; +} + /* void NeuralNetwork::writeStatus(int element, int epoch) { @@ -969,7 +1242,15 @@ long NeuralNetwork::getMemoryUsage() int numNeurons = getNumNeurons(); mem += (numLayers - 1) * sizeof(int); // weightOffset +#ifndef ALTERNATIVE_WEIGHT_ORDERING + mem += (numLayers - 1) * sizeof(int*); // biasOffset + for (int i = 0; i < numLayers - 1; i++) + { + mem += layers[i].numNeurons * sizeof(int); + } +#else mem += (numLayers - 1) * sizeof(int); // biasOffset +#endif mem += (numLayers - 1) * sizeof(int); // biasOnlyOffset mem += numLayers * sizeof(Layer); // layers mem += numNeurons * sizeof(Neuron); // neurons diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h index c56c58e9f..8cbef0cfd 100644 --- a/src/libnnp/NeuralNetwork.h +++ b/src/libnnp/NeuralNetwork.h @@ -17,8 +17,10 @@ #ifndef NEURALNETWORK_H #define NEURALNETWORK_H +#include // std::size_t #include // std::ofstream #include // std::string +#include // std::pair #include // std::vector namespace nnp @@ -152,29 +154,70 @@ class NeuralNetwork * @f[ * \underbrace{ * \overbrace{ - * a^{01}_{00}, \ldots, a^{01}_{0m_1}, - * a^{01}_{10}, \ldots, a^{01}_{1m_1}, + * a^{01}_{00}, \ldots, a^{01}_{n_00}, b^{1}_{0} + * }^{\text{Neuron}^{1}_{0}} + * \overbrace{ + * a^{01}_{01}, \ldots, a^{01}_{n_01}, b^{1}_{1} + * }^{\text{Neuron}^{1}_{1}} + * \ldots, + * \overbrace{ + * a^{01}_{0n_1}, \ldots, a^{01}_{n_0n_1}, b^{1}_{n_1} + * }^{\text{Neuron}^{1}_{n_1}} + * }_{\text{Layer } 0 \rightarrow 1}, + * \underbrace{ + * a^{12}_{00}, \ldots, b^{2}_{n_2} + * }_{\text{Layer } 1 \rightarrow 2}, + * \ldots, + * \underbrace{ + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} + * }_{\text{Layer } p-1 \rightarrow p} + * @f] + * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron + * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer + * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron + * @f$k@f$ in layer @f$i@f$. + * + * This ordering scheme is used internally to store weights in memory. + * Because all weights and biases connected to a neuron are aligned in a + * continuous block this memory layout simplifies the implementation of + * node-decoupled Kalman filter training (NDEKF). However, for backward + * compatibility reasons this layout is NOT used for storing weights on + * disk (see setConnectionsAO()). + */ + void setConnections(double const* const& connections); + /** Set neural network weights and biases (alternative ordering) + * + * @param[in] connections One-dimensional array with neural network + * connections in the following order: + * @f[ + * \underbrace{ + * \overbrace{ + * a^{01}_{00}, \ldots, a^{01}_{0n_1}, + * a^{01}_{10}, \ldots, a^{01}_{1n_1}, * \ldots, - * a^{01}_{n_00}, \ldots, a^{01}_{n_0m_1}, + * a^{01}_{n_00}, \ldots, a^{01}_{n_0n_1}, * }^{\text{Weights}} * \overbrace{ - * b^{1}_{0}, \ldots, b^{1}_{m_1} + * b^{1}_{0}, \ldots, b^{1}_{n_1} * }^{\text{Biases}} * }_{\text{Layer } 0 \rightarrow 1}, * \underbrace{ - * a^{12}_{00}, \ldots, b^{2}_{m_2} + * a^{12}_{00}, \ldots, b^{2}_{n_2} * }_{\text{Layer } 1 \rightarrow 2}, * \ldots, * \underbrace{ - * a^{p-1,p}_{00}, \ldots, b^{p}_{m_p} + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} * }_{\text{Layer } p-1 \rightarrow p} * @f] * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron * @f$k@f$ in layer @f$i@f$. + * + * This memory layout is used when storing weights on disk. */ - void setConnections(double const* const& connections); + void setConnectionsAO( + double const* const& connections); /** Get neural network weights and biases. * * @param[out] connections One-dimensional array with neural network @@ -182,6 +225,13 @@ class NeuralNetwork * #setConnections()) */ void getConnections(double* connections) const; + /** Get neural network weights and biases (alternative ordering). + * + * @param[out] connections One-dimensional array with neural network + * connections (same order as described in + * #setConnectionsAO()) + */ + void getConnectionsAO(double* connections) const; /** Initialize connections with random numbers. * * @param[in] seed Random number generator seed. @@ -211,14 +261,14 @@ class NeuralNetwork ModificationScheme modificationScheme, double parameter1, double parameter2); - /** Set neural network input layer node values. + /** Set neural network input layer neuron values. * - * @param[in] input Input layer node values. + * @param[in] input Input layer neuron values. */ void setInput(double const* const& input) const; - /** Get neural network output layer node values. + /** Get neural network output layer neuron values. * - * @param[out] output Output layer node values. + * @param[out] output Output layer neuron values. */ void getOutput(double* output) const; /** Propagate input information through all layers. @@ -295,10 +345,20 @@ class NeuralNetwork void calculateDFdc(double* dFdc, double const* const& dGdxyz) const; /** Write connections to file. + * + * __CAUTION__: For compatibility reasons this format is NOT used for + * storing NN weights to disk! * * @param[in,out] file File stream to write to. */ void writeConnections(std::ofstream& file) const; + /** Write connections to file (alternative ordering). + * + * This NN weights ordering layout is used when writing weights to disk. + * + * @param[in,out] file File stream to write to. + */ + void writeConnectionsAO(std::ofstream& file) const; /** Return gathered neuron statistics. * * @param[out] count Number of neuron output value calculations. @@ -333,6 +393,24 @@ class NeuralNetwork * Counters and summation variables for neuron statistics are reset. */ void resetNeuronStatistics(); + /** Get layer limits in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with layer limits as pair (begin, end), + * (numLayers - 1) points. + */ + std::vector> getLayerLimits() const; + /** Get neuron limits in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with neuron limits as pair (begin, end), + * (numNeurons - inputLayer->numNeurons) points. + */ + std::vector> getNeuronLimits() const; //void writeStatus(int, int); long getMemoryUsage(); /** Print neural network architecture. @@ -404,8 +482,13 @@ class NeuralNetwork int numHiddenLayers; /// Offset adress of weights per layer in combined weights+bias array. int* weightOffset; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + /// Offset adress of each neuron per layer in combined weights+bias array. + int** biasOffset; +#else /// Offset adress of biases per layer in combined weights+bias array. int* biasOffset; +#endif /// Offset adress of biases per layer in bias only array. int* biasOnlyOffset; /// Pointer to input layer. diff --git a/src/libnnp/Settings.cpp b/src/libnnp/Settings.cpp index 68c0638d8..7f6a2c2ca 100644 --- a/src/libnnp/Settings.cpp +++ b/src/libnnp/Settings.cpp @@ -73,6 +73,7 @@ map const createKnownKeywordsMap() m["jacobian_mode" ] = ""; m["update_strategy" ] = ""; m["selection_mode" ] = ""; + m["decoupling_type" ] = ""; m["task_batch_size_energy" ] = ""; m["task_batch_size_force" ] = ""; m["gradient_type" ] = ""; diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index ab55a8419..5d02b44c2 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -16,9 +16,11 @@ #include "KalmanFilter.h" #include "utility.h" +#include "mpi-extra.h" #include #include #include +#include // std::map using namespace Eigen; using namespace std; @@ -27,8 +29,11 @@ using namespace nnp; KalmanFilter::KalmanFilter(size_t const sizeState, KalmanType const type) : Updater(sizeState), + myRank (0 ), + numProcs (0 ), sizeObservation(0 ), numUpdates (0 ), + sizeP (0 ), epsilon (0.0 ), q (0.0 ), q0 (0.0 ), @@ -62,17 +67,130 @@ KalmanFilter::KalmanFilter(size_t const sizeState, w = new Map(0, sizeState); xi = new Map(0, sizeObservation); H = new Map(0, sizeState, sizeObservation); - P.resize(sizeState, sizeState); - P.setIdentity(); - // Prevent problems with unallocated K when log starts. - K.resize(sizeState, sizeObservation); - K.setZero(); } KalmanFilter::~KalmanFilter() { } +void KalmanFilter::setupMPI(MPI_Comm* communicator) +{ + MPI_Comm_dup(*communicator, &comm); + MPI_Comm_rank(comm, &myRank); + MPI_Comm_size(comm, &numProcs); + + return; +} + +void KalmanFilter::setupDecoupling(vector> groupLimits) +{ + this->groupLimits = groupLimits; + + // Check consistency of decoupling group limits. + if (groupLimits.at(0).first != 0) + { + auto const& l = groupLimits.at(0); + throw runtime_error(strpr("ERROR: Inconsistent decoupling group " + "limits, first group must start with index " + "0 (is %zu).\n", + l.first)); + + } + for (size_t i = 0; i < groupLimits.size(); ++i) + { + auto const& l = groupLimits.at(i); + if (l.first > l.second) + { + throw runtime_error( + strpr("ERROR: Inconsistent decoupling group limits, " + "group %zu: start %zu > end %zu.\n", + i, l.first, l.second)); + } + } + for (size_t i = 0; i < groupLimits.size() - 1; ++i) + { + auto const& l = groupLimits.at(i); + auto const& r = groupLimits.at(i + 1); + if (l.second + 1 != r.first) + { + throw runtime_error( + strpr("ERROR: Inconsistent decoupling group limits, " + "group %zu end %zu + 1 != group %zu start %zu.\n", + i, l.second, i + 1, r.first)); + } + } + if (groupLimits.back().second != sizeState - 1) + { + auto const& l = groupLimits.back(); + throw runtime_error(strpr("ERROR: Inconsistent decoupling group " + "limits, last group must end with index " + "%zu (is %zu).\n", + sizeState - 1, + l.second)); + + } + + // Distribute groups to MPI procs. + numGroupsPerProc.resize(numProcs, 0); + groupOffsetPerProc.resize(numProcs, 0); + int quotient = groupLimits.size() / numProcs; + int remainder = groupLimits.size() % numProcs; + int groupSum = 0; + for (int i = 0; i < numProcs; ++i) + { + numGroupsPerProc.at(i) = quotient; + if (remainder > 0 && i < remainder) + { + numGroupsPerProc.at(i)++; + } + groupOffsetPerProc.at(i) = groupSum; + groupSum += numGroupsPerProc.at(i); + } + + // Compute state vector segment length and offset for each processor. + for (int i = 0; i < numProcs - 1; ++i) + { + sizeStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(i+1)).first - + groupLimits.at(groupOffsetPerProc.at(i)).first); + offsetStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(i)).first); + } + sizeStatePerProc.push_back( + groupLimits.back().second + 1 - + groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first); + offsetStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first); + + // Store group index and segment length for this processor. + for (size_t i = 0; i < numGroupsPerProc.at(myRank); ++i) + { + size_t index = groupOffsetPerProc.at(myRank) + i; + size_t size = groupLimits.at(index).second + - groupLimits.at(index).first + 1; + myGroups.push_back(make_pair(index, size)); + } + + // Allocate per-processor matrices for all groups. + for (auto g : myGroups) + { + P.push_back(Eigen::MatrixXd(g.second, g.second)); + P.back().setIdentity(); + + X.push_back(Eigen::MatrixXd(g.second, sizeObservation)); + + // Prevent problems with unallocated K when log starts. + K.push_back(Eigen::MatrixXd(g.second, sizeObservation)); + K.back().setZero(); + + sizeP += g.second * g.second; + } + + MPI_Allreduce(MPI_IN_PLACE, &sizeP, 1, MPI_SIZE_T, MPI_SUM, comm); + + return; +} + void KalmanFilter::setSizeObservation(size_t const size) { sizeObservation = size; @@ -125,15 +243,32 @@ void KalmanFilter::update() void KalmanFilter::update(size_t const sizeObservation) { - X.resize(sizeState, sizeObservation); + MatrixXd A(sizeObservation, sizeObservation); + A.setZero(); - // Calculate temporary result. - // X = P . H - X = P.selfadjointView() * (*H); + for (size_t i = 0; i < myGroups.size(); ++i) + { + size_t const& j = myGroups.at(i).first; // group index + size_t const& n = myGroups.at(i).second; // group size + size_t const& s = groupLimits.at(j).first; // group weight start index + + auto& x = X.at(i); + auto& p = P.at(i); + + x.resize(n, sizeObservation); + + // Calculate temporary result. + // X = P . H + x = p.selfadjointView() * H->block(s, 0, n, sizeObservation); + + // Calculate scaling matrix. + // A = H^T . X + // For decoupling summarize scaling matrix here (locally). + A += H->block(s, 0, n, sizeObservation).transpose() * x; + } - // Calculate scaling matrix. - // A = H^T . X - MatrixXd A = H->transpose() * X; + // Summarize scaling matrix globally. + MPI_Allreduce(MPI_IN_PLACE, A.data(), A.size(), MPI_DOUBLE, MPI_SUM, comm); // Increase learning rate. // eta(n) = eta(0) * exp(n * tau) @@ -150,38 +285,52 @@ void KalmanFilter::update(size_t const sizeObservation) A.diagonal() += VectorXd::Constant(sizeObservation, lambda); } - // Calculate Kalman gain matrix. - // K = X . A^-1 - K.resize(sizeState, sizeObservation); - K = X * A.inverse(); - - // Update error covariance matrix. - // P = P - K . X^T - P.noalias() -= K * X.transpose(); - - // Apply forgetting factor. - if (type == KT_FADINGMEMORY) + for (size_t i = 0; i < myGroups.size(); ++i) { - P *= 1.0 / lambda; + size_t const& j = myGroups.at(i).first; // group index + size_t const& n = myGroups.at(i).second; // group size + size_t const& s = groupLimits.at(j).first; // group weight start index + + auto& x = X.at(i); + auto& p = P.at(i); + auto& k = K.at(i); + + // Calculate Kalman gain matrix. + // K = X . A^-1 + k.resize(n, sizeObservation); + k = x * A.inverse(); + + // Update error covariance matrix. + // P = P - K . X^T + p.noalias() -= k * x.transpose(); + + // Apply forgetting factor. + if (type == KT_FADINGMEMORY) + { + p *= 1.0 / lambda; + } + // Add process noise. + // P = P + Q + p.diagonal() += VectorXd::Constant(n, q); + + // Update state vector. + // w = w + K . xi + w->segment(s, n) += k * (*xi); + + // Anneal process noise. + // q(n) = q(0) * exp(-n * tau) + if (q > qmin) q *= exp(-qtau); + + // Update forgetting factor. + if (type == KT_FADINGMEMORY) + { + lambda = nu * lambda + 1.0 - nu; + gamma = 1.0 / (1.0 + lambda / gamma); + } } - // Add process noise. - // P = P + Q - P.diagonal() += VectorXd::Constant(sizeState, q); - - // Update state vector. - // w = w + K . xi - (*w) += K * (*xi); - // Anneal process noise. - // q(n) = q(0) * exp(-n * tau) - if (q > qmin) q *= exp(-qtau); - - // Update forgetting factor. - if (type == KT_FADINGMEMORY) - { - lambda = nu * lambda + 1.0 - nu; - gamma = 1.0 / (1.0 + lambda / gamma); - } + // Communicate new weight segments. + MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, w->data(), sizeStatePerProc.data(), offsetStatePerProc.data(), MPI_DOUBLE, comm); numUpdates++; @@ -206,7 +355,10 @@ void KalmanFilter::setParametersStandard(double const epsilon, q = q0; eta = eta0; - P /= epsilon; + for (auto& p : P) + { + p /= epsilon; + } return; } @@ -226,7 +378,10 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon, this->nu = nu ; q = q0; - P /= epsilon; + for (auto& p : P) + { + p /= epsilon; + } gamma = 1.0; return; @@ -235,12 +390,30 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon, string KalmanFilter::status(size_t epoch) const { - double Pasym = 0.5 * (P - P.transpose()).array().abs().mean(); - double Pdiag = P.diagonal().array().abs().sum(); - double Poffdiag = (P.array().abs().sum() - Pdiag) - / (sizeState * (sizeState - 1)); + double Pasym = 0.0; + double Pdiag = 0.0; + double Poffdiag = 0.0; + double Kmean = 0.0; + + for (size_t i = 0; i < myGroups.size(); ++i) + { + auto const& p = P.at(i); + auto const& k = K.at(i); + Pasym += 0.5 * (p - p.transpose()).array().abs().sum(); + double pdiag = p.diagonal().array().abs().sum(); + Pdiag += pdiag; + Poffdiag += p.array().abs().sum() - pdiag; + Kmean += k.array().abs().sum(); + } + + MPI_Allreduce(MPI_IN_PLACE, &Pasym , 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Pdiag , 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Poffdiag, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Kmean , 1, MPI_DOUBLE, MPI_SUM, comm); + Pasym /= (sizeP - sizeState); Pdiag /= sizeState; - double Kmean = K.array().abs().mean(); + Poffdiag /= (sizeP - sizeState); + Kmean /= (sizeState * sizeObservation); string s = strpr("%10zu %10zu %16.8E %16.8E %16.8E %16.8E %16.8E", epoch, numUpdates, Pdiag, Poffdiag, Pasym, Kmean, q); @@ -314,11 +487,24 @@ vector KalmanFilter::info() const { vector v; + v.push_back("Extended Kalman Filter (EKF) optimizer:\n"); + v.push_back(strpr("sizeState = %zu\n", sizeState)); + v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); + v.push_back(strpr("myRank = %d\n", myRank)); + v.push_back(strpr("numProcs = %d\n", numProcs)); + v.push_back(strpr("OpenMP threads used : %d\n", + nbThreads())); + v.push_back(strpr("Number of decoupling groups : %zu\n", + groupLimits.size())); + v.push_back(strpr("Number of decoupling groups of proc %3d: %zu\n", + myRank, myGroups.size())); + v.push_back(strpr("P matrix size ratio (compared to GEKF) : %6.2f %%\n", + 100.0 * sizeP / (sizeState * sizeState))); + v.push_back("-----------------------------------------" + "--------------------------------------\n"); if (type == KT_STANDARD) { v.push_back(strpr("KalmanType::KT_STANDARD (%d)\n", type)); - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -330,8 +516,6 @@ vector KalmanFilter::info() const else if (type == KT_FADINGMEMORY) { v.push_back(strpr("KalmanType::KT_FADINGMEMORY (%d)\n", type)); - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -339,7 +523,17 @@ vector KalmanFilter::info() const v.push_back(strpr("lambda = %12.4E\n", lambda)); v.push_back(strpr("nu = %12.4E\n", nu )); } - v.push_back(strpr("OpenMP threads used: %d\n", nbThreads())); + //for (size_t i = 0; i < groupLimits.size(); ++i) + //{ + // v.push_back(strpr(" - group %5zu size: %zu\n", + // i, + // groupLimits.at(i).second + // - groupLimits.at(i).first + 1)); + //} + //for (auto g : myGroups) + //{ + // v.push_back(strpr(" - group %5zu size: %zu\n", g.first, g.second)); + //} return v; } diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h index 356cee949..0d409ac1c 100644 --- a/src/libnnptrain/KalmanFilter.h +++ b/src/libnnptrain/KalmanFilter.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace nnp @@ -50,6 +51,19 @@ class KalmanFilter : public Updater /** Destructor. */ virtual ~KalmanFilter(); + /** Basic MPI setup. + * + * @param[in] communicator Input communicator of calling program. + */ + void setupMPI(MPI_Comm* communicator); + /** Set up Kalman filter decoupling. + * + * @param[in] groupLimits Vector containing starting and ending indices of + * each decoupling group. + */ + void setupDecoupling( + std::vector> groupLimits); /** Set observation vector size. * * @param[in] size Size of the observation vector. @@ -194,10 +208,16 @@ class KalmanFilter : public Updater private: /// Kalman filter type. KalmanType type; + /// My process ID. + int myRank; + /// Total number of MPI processors. + int numProcs; /// Size of observation (measurement) vector. std::size_t sizeObservation; /// Total number of updates performed. std::size_t numUpdates; + /// Number of covariance matrix entries in memory (sum over all groups). + std::size_t sizeP; /// Error covariance initialization parameter @f$\epsilon@f$. double epsilon; /// Process noise @f$q@f$. @@ -222,18 +242,34 @@ class KalmanFilter : public Updater double nu; /// Forgetting gain factor gamma for fading memory Kalman filter. double gamma; + /// Size of state vector segment handled by each processor. + std::vector sizeStatePerProc; + /// Offset of state vector segment handled by each processor. + std::vector offsetStatePerProc; + /// List of (group indices, group sizes) of this processor. + std::vector> myGroups; + /// Number of groups for each processor. + std::vector numGroupsPerProc; + /// Offset in terms of groups for each processor. + std::vector groupOffsetPerProc; + /// Decoupling group limits, i.e. starting end ending indices. + std::vector> groupLimits; /// State vector. Eigen::Map* w; /// Error vector. Eigen::Map* xi; /// Derivative matrix. Eigen::Map* H; - /// Error covariance matrix. - Eigen::MatrixXd P; + /// Error covariance matrices for all groups. + std::vector P; /// Kalman gain matrix. - Eigen::MatrixXd K; + std::vector K; /// Intermediate result X = P . H. - Eigen::MatrixXd X; + std::vector X; + /// Global MPI communicator. + MPI_Comm comm; }; ////////////////////////////////// diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index ba293c7d0..94de5b470 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -40,6 +40,7 @@ Training::Training() : Dataset(), jacobianMode (JM_SUM ), updateStrategy (US_COMBINED ), selectionMode (SM_RANDOM ), + decouplingType (DT_GLOBAL ), hasUpdaters (false ), hasStructures (false ), useForces (false ), @@ -300,7 +301,13 @@ void Training::initializeWeights() w.at(i).at(j) = minWeights + gsl_rng_uniform(rngGlobal) * (maxWeights - minWeights); } - elements.at(i).neuralNetwork->setConnections(&(w.at(i).front())); +#ifndef ALTERNATIVE_WEIGHT_ORDERING + // To be consistent this should actually be the non-AO version, + // but this way results stay comparable. + elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); +#else + elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); +#endif } if (settings.keywordExists("nguyen_widrow_weights_short")) { @@ -991,6 +998,44 @@ void Training::setupTraining() { kalmanType = (KalmanFilter::KalmanType) atoi(settings["kalman_type"].c_str()); + decouplingType = (DecouplingType) + atoi(settings["decoupling_type"].c_str()); + if (decouplingType == DT_GLOBAL) + { + log << strpr("No decoupling (GEKF) selected: DT_GLOBAL (%d).\n", + decouplingType); + } + else if (decouplingType == DT_ELEMENT) + { + log << strpr("Per-element decoupling (ED-GEKF) selected: " + "DT_ELEMENT (%d).\n", decouplingType); + } + else if (decouplingType == DT_LAYER) + { + log << strpr("Per-layer decoupling selected: " + "DT_LAYER (%d).\n", decouplingType); + } + else if (decouplingType == DT_NODE) + { + log << strpr("Per-node decoupling (NDEKF) selected: " + "DT_NODE (%d).\n", decouplingType); + } + else if (decouplingType == DT_FULL) + { + log << strpr("Full (per-weight) decoupling selected: " + "DT_FULL (%d).\n", decouplingType); + } + else + { + throw runtime_error("ERROR: Unknown Kalman filter decoupling " + "type.\n"); + } + if (decouplingType != DT_GLOBAL && updateStrategy != US_COMBINED) + { + throw runtime_error(strpr("ERROR: Kalman filter decoupling works " + "only in conjunction with " + "update_strategy %d\".\n", US_COMBINED)); + } } for (size_t i = 0; i < numUpdaters; ++i) @@ -1008,6 +1053,81 @@ void Training::setupTraining() updaters.push_back( (Updater*)new KalmanFilter(numWeightsPerUpdater.at(i), kalmanType)); + KalmanFilter* u = dynamic_cast(updaters.back()); + if (parallelMode == PM_TRAIN_ALL) + { + u->setupMPI(&comm); + } + else + { + MPI_Group groupWorld; + MPI_Comm_group(comm, &groupWorld); + int const size0 = 1; + int const rank0[1] = {0}; + MPI_Group groupRank0; + MPI_Group_incl(groupWorld, size0, rank0, &groupRank0); + MPI_Comm commRank0; + MPI_Comm_create_group(comm, groupRank0, 0, &commRank0); + u->setupMPI(&commRank0); + } + vector> limits; + if (decouplingType == DT_GLOBAL) + { + limits.push_back( + make_pair(0, numWeightsPerUpdater.at(i) - 1)); + } + else if (decouplingType == DT_ELEMENT) + { + for (size_t j = 0; j < numElements; ++j) + { + limits.push_back(make_pair( + weightsOffset.at(j), + weightsOffset.at(j) + elements.at(j). + neuralNetwork->getNumConnections() - 1)); + } + } + else if (decouplingType == DT_LAYER) + { + for (size_t j = 0; j < numElements; ++j) + { + for (auto l : elements.at(j).neuralNetwork + ->getLayerLimits()) + { + limits.push_back(make_pair( + weightsOffset.at(j) + l.first, + weightsOffset.at(j) + l.second)); + } + } + } + else if (decouplingType == DT_NODE) + { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (size_t j = 0; j < numElements; ++j) + { + for (auto l : elements.at(j).neuralNetwork + ->getNeuronLimits()) + { + limits.push_back(make_pair( + weightsOffset.at(j) + l.first, + weightsOffset.at(j) + l.second)); + } + } +#else + throw runtime_error("ERROR: Node-decoupled Kalman filter " + "(NDEFK) training not possible with " + "alternative (old) weight memory " + "layout, recompile without " + "-DALTERNATIVE_WEIGHT_ORDERING.\n"); +#endif + } + else if (decouplingType == DT_FULL) + { + for (size_t j = 0; j < numWeightsPerUpdater.at(i); ++j) + { + limits.push_back(make_pair(j, j)); + } + } + u->setupDecoupling(limits); } updaters.back()->setState(&(weights.at(i).front())); } @@ -1432,7 +1552,7 @@ void Training::calculateErrorEpoch() fileNameForcesTest = strpr("testforces.%06zu.out", epoch); } - // Calculate RMSE and write comparison files. + // Calculate error and write comparison files. calculateError(true, identifier, fileNameEnergiesTrain, @@ -1452,7 +1572,9 @@ void Training::writeWeights(string const fileNameFormat) const string fileName = strpr(fileNameFormat.c_str(), elements.at(i).getAtomicNumber()); file.open(fileName.c_str()); - elements.at(i).neuralNetwork->writeConnections(file); + // Attention: need alternative (old) ordering scheme here for + // backward compatibility! + elements.at(i).neuralNetwork->writeConnectionsAO(file); file.close(); } @@ -1629,7 +1751,7 @@ void Training::writeNeuronStatistics(string const fileName) const vector colInfo; vector colSize; title.push_back("Statistics for individual neurons gathered during " - "RMSE calculation."); + "error calculation."); colSize.push_back(10); colName.push_back("element"); colInfo.push_back("Element index."); @@ -1743,24 +1865,31 @@ void Training::writeUpdaterStatus(bool append, for (size_t i = 0; i < numUpdaters; ++i) { - string fileName; - if (updateStrategy == US_COMBINED) - { - fileName = strpr(fileNameFormat.c_str(), 0); - } - else if (updateStrategy == US_ELEMENT) + if (myRank == 0) { - fileName = strpr(fileNameFormat.c_str(), - elementMap.atomicNumber(i)); + string fileName; + if (updateStrategy == US_COMBINED) + { + fileName = strpr(fileNameFormat.c_str(), 0); + } + else if (updateStrategy == US_ELEMENT) + { + fileName = strpr(fileNameFormat.c_str(), + elementMap.atomicNumber(i)); + } + if (append) file.open(fileName.c_str(), ofstream::app); + else + { + file.open(fileName.c_str()); + appendLinesToFile(file, updaters.at(i)->statusHeader()); + } + file << updaters.at(i)->status(epoch); + file.close(); } - if (append) file.open(fileName.c_str(), ofstream::app); - else + else if (parallelMode == PM_TRAIN_ALL) { - file.open(fileName.c_str()); - appendLinesToFile(file, updaters.at(i)->statusHeader()); + updaters.at(i)->status(epoch); } - file << updaters.at(i)->status(epoch); - file.close(); } return; @@ -1884,7 +2013,7 @@ void Training::loop() else if (errorMetricForces == 1) metric = "MAE"; if (errorMetricEnergies % 2 == 0) peratom = "per atom "; - log << "The training loop output covers different RMSEs, update and\n"; + log << "The training loop output covers different errors, update and\n"; log << "timing information. The following quantities are organized\n"; log << "according to the matrix scheme below:\n"; log << "-------------------------------------------------------------------\n"; @@ -1911,7 +2040,7 @@ void Training::loop() log << "Abbreviations:\n"; log << " p. u. = physical units.\n"; log << " i. u. = internal units.\n"; - log << "Note: RMSEs in internal units (columns 5 + 6) are only present \n"; + log << "Note: Errors in internal units (columns 5 + 6) are only present \n"; log << " if data set normalization is used.\n"; log << "-------------------------------------------------------------------\n"; log << " 1 2 3 4 5 6\n"; @@ -1940,8 +2069,9 @@ void Training::loop() // Write learning curve. if (myRank == 0) writeLearningCurve(false); - // Write updater status to file. - if (myRank == 0) writeUpdaterStatus(false); + // Write updater status to file (decoupled KF requires all tasks to execute + // the status() function. + writeUpdaterStatus(false); // Write neuron statistics. writeNeuronStatisticsEpoch(); @@ -2013,7 +2143,7 @@ void Training::loop() if (myRank == 0) writeLearningCurve(true); // Write updater status to file. - if (myRank == 0) writeUpdaterStatus(true); + writeUpdaterStatus(true); // Write neuron statistics. writeNeuronStatisticsEpoch(); @@ -2280,6 +2410,22 @@ void Training::update(bool force) else { nn->calculateDEdc(&(dXdc.at(i).front())); + //vector temp(dXdc.at(i).size(), 0.0); + //vector tempAO(dXdc.at(i).size(), 0.0); + //nn->calculateDEdc(&(temp.front())); + //ofstream tf("dedc.out"); + //size_t count; + //count = 0; + //sort(temp.begin(), temp.end()); + //for (auto i : temp) tf << strpr("%zu %16.8E\n", count++, i); + //tf.close(); + //nn->calculateDEdcAO(&(tempAO.front())); + //tf.open("dedc.out.ao"); + //count = 0; + //sort(tempAO.begin(), tempAO.end()); + //for (auto i : tempAO) tf << strpr("%zu %16.8E\n", count++, i); + //tf.close(); + //throw runtime_error("END HERE"); } // Finally sum up Jacobian. if (updateStrategy == US_ELEMENT) iu = i; @@ -2715,7 +2861,11 @@ void Training::getWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork const* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->getConnections(&(weights.at(0).at(pos))); +#else + nn->getConnectionsAO(&(weights.at(0).at(pos))); +#endif pos += nn->getNumConnections(); } } @@ -2724,7 +2874,11 @@ void Training::getWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork const* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->getConnections(&(weights.at(i).front())); +#else + nn->getConnectionsAO(&(weights.at(i).front())); +#endif } } @@ -2739,7 +2893,11 @@ void Training::setWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->setConnections(&(weights.at(0).at(pos))); +#else + nn->setConnectionsAO(&(weights.at(0).at(pos))); +#endif pos += nn->getNumConnections(); } } @@ -2748,7 +2906,11 @@ void Training::setWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->setConnections(&(weights.at(i).front())); +#else + nn->setConnectionsAO(&(weights.at(i).front())); +#endif } } diff --git a/src/libnnptrain/Training.h b/src/libnnptrain/Training.h index 1ae678255..93022ebc8 100644 --- a/src/libnnptrain/Training.h +++ b/src/libnnptrain/Training.h @@ -110,6 +110,21 @@ class Training : public Dataset SM_THRESHOLD }; + /// Kalman filter decoupling type (requires UT_KF and US_COMBINED). + enum DecouplingType + { + /// No decoupling, global extended Kalman filter (GEKF). + DT_GLOBAL, + /// Per-element decoupling, ED-GEKF (doi.org/10.1021/acs.jctc.5b00211). + DT_ELEMENT, + /// Layer decoupling. + DT_LAYER, + /// Node decoupling, NDEKF. + DT_NODE, + /// Full decoupling, FDEKF. + DT_FULL + }; + /// Constructor. Training(); /// Destructor, updater vector needs to be cleaned. @@ -309,6 +324,8 @@ class Training : public Dataset UpdateStrategy updateStrategy; /// Selection mode for update candidates. SelectionMode selectionMode; + /// Kalman filter decoupling type if KF training selected. + DecouplingType decouplingType; /// If this rank performs weight updates. bool hasUpdaters; /// If this rank holds structure information. diff --git a/src/makefile.gnu b/src/makefile.gnu index 223a03485..ece0147ce 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -56,3 +56,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/src/makefile.intel b/src/makefile.intel index 17e61b7a0..4a9a0e94c 100644 --- a/src/makefile.intel +++ b/src/makefile.intel @@ -57,3 +57,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/src/makefile.llvm b/src/makefile.llvm index b748ce08b..30deb9a03 100644 --- a/src/makefile.llvm +++ b/src/makefile.llvm @@ -58,3 +58,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/test/cpp/Example_nnp_train.h b/test/cpp/Example_nnp_train.h index fe4aa3d22..88ed24a76 100644 --- a/test/cpp/Example_nnp_train.h +++ b/test/cpp/Example_nnp_train.h @@ -28,13 +28,13 @@ void BoostDataContainer::setup() e->rmseForcesTrain = 3.77302629E-02; e->rmseForcesTest = 2.25135617E-01; - //examples.push_back(Example_nnp_train("H2O_RPBE-D3")); - //e = &(examples.back()); - //e->lastEpoch = 10; - //e->rmseEnergyTrain = 1.02312914E-04; - //e->rmseEnergyTest = 5.28837597E-04; - //e->rmseForcesTrain = 1.67069652E-02; - //e->rmseForcesTest = 1.07708383E-02; + examples.push_back(Example_nnp_train("H2O_RPBE-D3")); + e = &(examples.back()); + e->lastEpoch = 5; + e->rmseEnergyTrain = 9.19395999E-04; + e->rmseEnergyTest = 7.42520683E-04; + e->rmseForcesTrain = 2.40682415E-02; + e->rmseForcesTest = 1.44776060E-02; return; }