LCOV - code coverage report
Current view: top level - annfunc - ANN.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage Lines: 134 138 97.1 %
Date: 2025-11-25 13:55:50 Functions: 5 6 83.3 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             : MIT License
       3             : 
       4             : Copyright (c) 2019 Wei Chen and Andrew Ferguson
       5             : 
       6             : Permission is hereby granted, free of charge, to any person obtaining a copy
       7             : of this software and associated documentation files (the "Software"), to deal
       8             : in the Software without restriction, including without limitation the rights
       9             : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      10             : copies of the Software, and to permit persons to whom the Software is
      11             : furnished to do so, subject to the following conditions:
      12             : 
      13             : The above copyright notice and this permission notice shall be included in all
      14             : copies or substantial portions of the Software.
      15             : 
      16             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      17             : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      18             : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      19             : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      20             : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      21             : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      22             : SOFTWARE.
      23             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      24             : #include "function/Function.h"
      25             : #include "core/ActionRegister.h"
      26             : #include "cassert"
      27             : 
      28             : #include <string>
      29             : #include <cmath>
      30             : #include <iostream>
      31             : // #include <stdio.h>
      32             : 
      33             : using namespace std;
      34             : 
      35             : // #define DEBUG
      36             : // #define DEBUG_2
      37             : // #define DEBUG_3
      38             : 
      39             : namespace PLMD {
      40             : namespace function {
      41             : namespace annfunc {
      42             : 
      43             : //+PLUMEDOC ANNMOD_Function ANN
      44             : /*
      45             : Calculates the ANN-function.
      46             : 
      47             : This module implements ANN class, which is a subclass of Function class.
      48             : ANN class takes multi-dimensional arrays as inputs for a fully-connected feedforward neural network
      49             : with specified neural network weights and generates corresponding outputs.
      50             : The ANN outputs can be used as collective variables, inputs for other collective variables,
      51             : or inputs for data analysis tools.
      52             : 
      53             : \par Examples
      54             : 
      55             : Assume we have an ANN with numbers of nodes being [2, 3, 1], and weights connecting layer 0 and 1 are
      56             : 
      57             : [[1,2], [3,4], [5,6]]
      58             : 
      59             : weights connecting layer 1 and 2 are
      60             : 
      61             : [[7,8,9]]
      62             : 
      63             : Bias for layer 1 and 2 are [10, 11, 12] and [13], respectively.
      64             : 
      65             : All activation functions are Tanh.
      66             : 
      67             : Then if input variables are l_0_out_0, l_0_out_1, the corresponding ANN function object can be defined using
      68             : following plumed script:
      69             : 
      70             : \plumedfile
      71             : ANN ...
      72             : LABEL=ann
      73             : ARG=l_0_out_0,l_0_out_1
      74             : NUM_LAYERS=3
      75             : NUM_NODES=2,3,1
      76             : ACTIVATIONS=Tanh,Tanh
      77             : WEIGHTS0=1,2,3,4,5,6
      78             : WEIGHTS1=7,8,9
      79             : BIASES0=10,11,12
      80             : BIASES1=13
      81             : ... ANN
      82             : \endplumedfile
      83             : 
      84             : To access its components, we use "ann.node-0", "ann.node-1", ..., which represents the components of neural network outputs.
      85             : 
      86             : 
      87             : */
      88             : //+ENDPLUMEDOC
      89             : 
      90             : class ANN : public Function {
      91             : private:
      92             :   int num_layers;
      93             :   vector<int> num_nodes;
      94             :   vector<string> activations;   // activation functions
      95             :   vector<vector<double> > weights;  // flattened weight arrays
      96             :   vector<vector<double> > biases;
      97             :   vector<vector<double> > output_of_each_layer;
      98             :   vector<vector<double> > input_of_each_layer;
      99             :   vector<double** > coeff;  // weight matrix arrays, reshaped from "weights"
     100             : 
     101             : public:
     102             :   static void registerKeywords( Keywords& keys );
     103             :   explicit ANN(const ActionOptions&);
     104             :   virtual void calculate();
     105             :   void calculate_output_of_each_layer(const vector<double>& input);
     106             :   void back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component);
     107             : };
     108             : 
     109             : PLUMED_REGISTER_ACTION(ANN,"ANN")
     110             : 
     111           7 : void ANN::registerKeywords( Keywords& keys ) {
     112           7 :   Function::registerKeywords(keys);
     113           7 :   keys.use("ARG");
     114           7 :   keys.use("PERIODIC");
     115          14 :   keys.add("compulsory", "NUM_LAYERS", "number of layers of the neural network");
     116          14 :   keys.add("compulsory", "NUM_NODES", "numbers of nodes in each layer of the neural network");
     117          14 :   keys.add("compulsory", "ACTIVATIONS", "activation functions for the neural network");
     118          14 :   keys.add("numbered", "WEIGHTS", "flattened weight arrays connecting adjacent layers, "
     119             :            "WEIGHTS0 represents flattened weight array connecting layer 0 and layer 1, "
     120             :            "WEIGHTS1 represents flattened weight array connecting layer 1 and layer 2, ...");
     121          14 :   keys.add("numbered", "BIASES", "bias array for each layer of the neural network, "
     122             :            "BIASES0 represents bias array for layer 1, BIASES1 represents bias array for layer 2, ...");
     123             :   // since v2.2 plumed requires all components be registered
     124          14 :   keys.addOutputComponent("node", "default", "components of ANN outputs");
     125           7 : }
     126             : 
     127           5 : ANN::ANN(const ActionOptions&ao):
     128             :   Action(ao),
     129           5 :   Function(ao) {
     130           5 :   parse("NUM_LAYERS", num_layers);
     131           5 :   num_nodes = vector<int>(num_layers);
     132          10 :   activations = vector<string>(num_layers - 1);
     133          10 :   output_of_each_layer = vector<vector<double> >(num_layers);
     134          10 :   input_of_each_layer = vector<vector<double> >(num_layers);
     135           5 :   coeff = vector<double** >(num_layers - 1);
     136           5 :   parseVector("NUM_NODES", num_nodes);
     137           5 :   parseVector("ACTIVATIONS", activations);
     138           5 :   log.printf("\nactivations = ");
     139          15 :   for (const auto & ss: activations) {
     140          10 :     log.printf("%s, ", ss.c_str());
     141             :   }
     142           5 :   log.printf("\nnum_nodes = ");
     143          20 :   for (auto ss: num_nodes) {
     144          15 :     log.printf("%d, ", ss);
     145             :   }
     146             :   vector<double> temp_single_coeff, temp_single_bias;
     147          10 :   for (int ii = 0; ; ii ++) {
     148             :     // parse coeff
     149          30 :     if( !parseNumberedVector("WEIGHTS", ii, temp_single_coeff) ) {
     150           5 :       temp_single_coeff=weights[ii-1];
     151             :       break;
     152             :     }
     153          10 :     weights.push_back(temp_single_coeff);
     154          10 :     log.printf("size of temp_single_coeff = %lu\n", temp_single_coeff.size());
     155          10 :     log.printf("size of weights = %lu\n", weights.size());
     156             :     // parse bias
     157          20 :     if( !parseNumberedVector("BIASES", ii, temp_single_bias) ) {
     158           0 :       temp_single_bias=biases[ii-1];
     159             :     }
     160          10 :     biases.push_back(temp_single_bias);
     161          10 :     log.printf("size of temp_single_bias = %lu\n", temp_single_bias.size());
     162          10 :     log.printf("size of biases = %lu\n", biases.size());
     163             :   }
     164             : 
     165           5 :   if(getNumberOfArguments() != num_nodes[0]) {
     166           0 :     error("Number of arguments is wrong");
     167             :   }
     168             : 
     169           5 :   auto temp_coeff = weights;
     170          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     171             :     int num_of_rows, num_of_cols; // num of rows/cols for the coeff matrix of this connection
     172          10 :     num_of_rows = num_nodes[ii + 1];
     173          10 :     num_of_cols = num_nodes[ii];
     174             :     assert (num_of_rows * num_of_cols == temp_coeff[ii].size()); // check whether the size matches
     175             :     // create a 2d array to hold coefficients
     176          10 :     coeff[ii] = new double*[num_of_rows];
     177          32 :     for (int kk = 0; kk < num_of_rows; kk ++) {
     178          22 :       coeff[ii][kk] = new double[num_of_cols];
     179             :     }
     180          61 :     for (int jj = 0; jj < temp_coeff[ii].size(); jj ++) {
     181          51 :       coeff[ii][jj / num_of_cols][jj % num_of_cols] = temp_coeff[ii][jj];
     182             :     }
     183             :   }
     184             :   // check coeff
     185          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     186          10 :     log.printf("coeff %d = \n", ii);
     187          32 :     for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
     188          73 :       for (int kk = 0; kk < num_nodes[ii]; kk ++) {
     189          51 :         log.printf("%f ", coeff[ii][jj][kk]);
     190             :       }
     191          22 :       log.printf("\n");
     192             :     }
     193             :   }
     194             :   // check bias
     195          15 :   for (int ii = 0; ii < num_layers - 1; ii ++) {
     196          10 :     log.printf("bias %d = \n", ii);
     197          32 :     for (int jj = 0; jj < num_nodes[ii + 1]; jj ++) {
     198          22 :       log.printf("%f ", biases[ii][jj]);
     199             :     }
     200          10 :     log.printf("\n");
     201             :   }
     202           5 :   log.printf("initialization ended\n");
     203             :   // create components
     204          12 :   for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
     205           7 :     string name_of_this_component = "node-" + to_string(ii);
     206           7 :     addComponentWithDerivatives(name_of_this_component);
     207           7 :     componentIsNotPeriodic(name_of_this_component);
     208             :   }
     209           5 :   checkRead();
     210          10 : }
     211             : 
     212        1638 : void ANN::calculate_output_of_each_layer(const vector<double>& input) {
     213             :   // first layer
     214        1638 :   output_of_each_layer[0] = input;
     215             :   // following layers
     216        4914 :   for(int ii = 1; ii < num_nodes.size(); ii ++) {
     217        3276 :     output_of_each_layer[ii].resize(num_nodes[ii]);
     218        3276 :     input_of_each_layer[ii].resize(num_nodes[ii]);
     219             :     // first calculate input
     220       10374 :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     221        7098 :       input_of_each_layer[ii][jj] = biases[ii - 1][jj];  // add bias term
     222       23478 :       for (int kk = 0; kk < num_nodes[ii - 1]; kk ++) {
     223       16380 :         input_of_each_layer[ii][jj] += coeff[ii - 1][jj][kk] * output_of_each_layer[ii - 1][kk];
     224             :       }
     225             :     }
     226             :     // then get output
     227        3276 :     if (activations[ii - 1] == string("Linear")) {
     228        1092 :       for(int jj = 0; jj < num_nodes[ii]; jj ++) {
     229         546 :         output_of_each_layer[ii][jj] = input_of_each_layer[ii][jj];
     230             :       }
     231        2730 :     } else if (activations[ii - 1] == string("Tanh")) {
     232        7644 :       for(int jj = 0; jj < num_nodes[ii]; jj ++) {
     233        5460 :         output_of_each_layer[ii][jj] = tanh(input_of_each_layer[ii][jj]);
     234             :       }
     235         546 :     } else if (activations[ii - 1] == string("Circular")) {
     236             :       assert (num_nodes[ii] % 2 == 0);
     237        1092 :       for(int jj = 0; jj < num_nodes[ii] / 2; jj ++) {
     238         546 :         double radius = sqrt(input_of_each_layer[ii][2 * jj] * input_of_each_layer[ii][2 * jj]
     239         546 :                              +input_of_each_layer[ii][2 * jj + 1] * input_of_each_layer[ii][2 * jj + 1]);
     240         546 :         output_of_each_layer[ii][2 * jj] = input_of_each_layer[ii][2 * jj] / radius;
     241         546 :         output_of_each_layer[ii][2 * jj + 1] = input_of_each_layer[ii][2 * jj + 1] / radius;
     242             : 
     243             :       }
     244             :     } else {
     245             :       printf("layer type not found!\n\n");
     246           0 :       return;
     247             :     }
     248             :   }
     249             : #ifdef DEBUG_2
     250             :   // print out the result for debugging
     251             :   printf("output_of_each_layer = \n");
     252             :   // for (int ii = num_layers - 1; ii < num_layers; ii ++) {
     253             :   for (int ii = 0; ii < num_layers; ii ++) {
     254             :     printf("layer[%d]: ", ii);
     255             :     if (ii != 0) {
     256             :       cout << activations[ii - 1] << "\t";
     257             :     } else {
     258             :       cout << "input \t" ;
     259             :     }
     260             :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     261             :       printf("%lf\t", output_of_each_layer[ii][jj]);
     262             :     }
     263             :     printf("\n");
     264             :   }
     265             :   printf("\n");
     266             : #endif
     267             :   return;
     268             : }
     269             : 
     270        2184 : void ANN::back_prop(vector<vector<double> >& derivatives_of_each_layer, int index_of_output_component) {
     271        2184 :   derivatives_of_each_layer = output_of_each_layer;  // the data structure and size should be the same, so I simply deep copy it
     272             :   // first calculate derivatives for bottleneck layer
     273        5460 :   for (int ii = 0; ii < num_nodes[num_nodes.size() - 1]; ii ++ ) {
     274        3276 :     if (ii == index_of_output_component) {
     275        2184 :       derivatives_of_each_layer[num_nodes.size() - 1][ii] = 1;
     276             :     } else {
     277        1092 :       derivatives_of_each_layer[num_nodes.size() - 1][ii] = 0;
     278             :     }
     279             :   }
     280             :   // the use back propagation to calculate derivatives for previous layers
     281        6552 :   for (int jj = num_nodes.size() - 2; jj >= 0; jj --) {
     282        4368 :     if (activations[jj] == string("Circular")) {
     283             :       vector<double> temp_derivative_of_input_for_this_layer;
     284        1092 :       temp_derivative_of_input_for_this_layer.resize(num_nodes[jj + 1]);
     285             : #ifdef DEBUG
     286             :       assert (num_nodes[jj + 1] % 2 == 0);
     287             : #endif
     288             :       // first calculate the derivative of input from derivative of output of this circular layer
     289        2184 :       for(int ii = 0; ii < num_nodes[jj + 1] / 2; ii ++) {
     290             :         // printf("size of input_of_each_layer[%d] = %d\n",jj,  input_of_each_layer[jj].size());
     291        1092 :         double x_p = input_of_each_layer[jj + 1][2 * ii];
     292        1092 :         double x_q = input_of_each_layer[jj + 1][2 * ii + 1];
     293        1092 :         double radius = sqrt(x_p * x_p + x_q * x_q);
     294        1092 :         temp_derivative_of_input_for_this_layer[2 * ii] = x_q / (radius * radius * radius)
     295        1092 :             * (x_q * derivatives_of_each_layer[jj + 1][2 * ii]
     296        1092 :                - x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]);
     297        1092 :         temp_derivative_of_input_for_this_layer[2 * ii + 1] = x_p / (radius * radius * radius)
     298        1092 :             * (x_p * derivatives_of_each_layer[jj + 1][2 * ii + 1]
     299        1092 :                - x_q * derivatives_of_each_layer[jj + 1][2 * ii]);
     300             :       }
     301             : #ifdef DEBUG
     302             :       for (int mm = 0; mm < num_nodes[jj + 1]; mm ++) {
     303             :         printf("temp_derivative_of_input_for_this_layer[%d] = %lf\n", mm, temp_derivative_of_input_for_this_layer[mm]);
     304             :         printf("derivatives_of_each_layer[%d + 1][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj + 1][mm]);
     305             :       }
     306             : #endif
     307             :       // the calculate the derivative of output of layer jj, from derivative of input of layer (jj + 1)
     308        4368 :       for (int mm = 0; mm < num_nodes[jj]; mm ++) {
     309        3276 :         derivatives_of_each_layer[jj][mm] = 0;
     310        9828 :         for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
     311        6552 :           derivatives_of_each_layer[jj][mm] += coeff[jj][kk][mm] \
     312        6552 :                                                * temp_derivative_of_input_for_this_layer[kk];
     313             : #ifdef DEBUG
     314             :           printf("derivatives_of_each_layer[%d][%d] = %lf\n", jj, mm, derivatives_of_each_layer[jj][mm]);
     315             :           printf("coeff[%d][%d][%d] = %lf\n", jj, kk, mm, coeff[jj][kk][mm]);
     316             : #endif
     317             :         }
     318             :       }
     319             :       // TODO: should be fine, pass all tests, although there seems to be some problems here previously
     320             :     } else {
     321       10920 :       for (int mm = 0; mm < num_nodes[jj]; mm ++) {
     322        7644 :         derivatives_of_each_layer[jj][mm] = 0;
     323       24024 :         for (int kk = 0; kk < num_nodes[jj + 1]; kk ++) {
     324       16380 :           if (activations[jj] == string("Tanh")) {
     325             :             // printf("tanh\n");
     326       14742 :             derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
     327       14742 :                                                  * coeff[jj][kk][mm] \
     328       14742 :                                                  * (1 - output_of_each_layer[jj + 1][kk] * output_of_each_layer[jj + 1][kk]);
     329        1638 :           } else if (activations[jj] == string("Linear")) {
     330             :             // printf("linear\n");
     331        1638 :             derivatives_of_each_layer[jj][mm] += derivatives_of_each_layer[jj + 1][kk] \
     332        1638 :                                                  * coeff[jj][kk][mm] \
     333        1638 :                                                  * 1;
     334             :           } else {
     335             :             printf("layer type not found!\n\n");
     336           0 :             return;
     337             :           }
     338             :         }
     339             :       }
     340             :     }
     341             :   }
     342             : #ifdef DEBUG
     343             :   // print out the result for debugging
     344             :   printf("derivatives_of_each_layer = \n");
     345             :   for (int ii = 0; ii < num_layers; ii ++) {
     346             :     printf("layer[%d]: ", ii);
     347             :     for (int jj = 0; jj < num_nodes[ii]; jj ++) {
     348             :       printf("%lf\t", derivatives_of_each_layer[ii][jj]);
     349             :     }
     350             :     printf("\n");
     351             :   }
     352             :   printf("\n");
     353             : #endif
     354             :   return;
     355             : }
     356             : 
     357        1638 : void ANN::calculate() {
     358             : 
     359        1638 :   vector<double> input_layer_data(num_nodes[0]);
     360        4914 :   for (int ii = 0; ii < num_nodes[0]; ii ++) {
     361        3276 :     input_layer_data[ii] = getArgument(ii);
     362             :   }
     363             : 
     364        1638 :   calculate_output_of_each_layer(input_layer_data);
     365             :   vector<vector<double> > derivatives_of_each_layer;
     366             : 
     367        3822 :   for (int ii = 0; ii < num_nodes[num_layers - 1]; ii ++) {
     368        2184 :     back_prop(derivatives_of_each_layer, ii);
     369        2184 :     string name_of_this_component = "node-" + to_string(ii);
     370        2184 :     Value* value_new=getPntrToComponent(name_of_this_component);
     371        2184 :     value_new -> set(output_of_each_layer[num_layers - 1][ii]);
     372        6552 :     for (int jj = 0; jj < num_nodes[0]; jj ++) {
     373        4368 :       value_new -> setDerivative(jj, derivatives_of_each_layer[0][jj]);  // TODO: setDerivative or addDerivative?
     374             :     }
     375             : #ifdef DEBUG_3
     376             :     printf("derivatives = ");
     377             :     for (int jj = 0; jj < num_nodes[0]; jj ++) {
     378             :       printf("%f ", value_new -> getDerivative(jj));
     379             :     }
     380             :     printf("\n");
     381             : #endif
     382             :   }
     383             : 
     384        3276 : }
     385             : 
     386             : }
     387             : }
     388             : }

Generated by: LCOV version 1.16