added full model hessian and fisher_inv

Hoeze · Hoeze · commit 5cfbe46adcf0 · 2018-08-06T04:09:08.000+02:00
diff --git a/batchglm/models/nb_glm/base.py b/batchglm/models/nb_glm/base.py
@@ -35,9 +35,8 @@
 ESTIMATOR_PARAMS.update({
     "loss": (),
     "gradient": ("features",),
-    "hessian_diagonal": ("features", "variables",),
-    "fisher_loc": ("design_loc_params", "features"),
-    "fisher_scale": ("design_scale_params", "features"),
+    "hessians": ("features", "delta_var0", "delta_var1"),
+    "fisher_inv": ("features", "delta_var0", "delta_var1"),
 })
 
 
@@ -393,7 +392,7 @@ def validate_data(self, **kwargs):
     
     def __init__(self, estim: AbstractEstimator):
         input_data = estim.input_data
-        params = estim.to_xarray(["a", "b", "loss", "gradient", "fisher_loc", "fisher_scale"], coords=input_data.data)
+        params = estim.to_xarray(["a", "b", "loss", "gradient", "hessians", "fisher_inv"], coords=input_data.data)
         
         XArrayModel.__init__(self, input_data, params)
     
@@ -410,9 +409,9 @@ def gradient(self):
         return self.params["loss"]
     
     @property
-    def fisher_loc(self):
-        return self.params["fisher_loc"]
+    def hessians(self):
+        return self.params["hessians"]
     
     @property
-    def fisher_scale(self):
-        return self.params["fisher_scale"]
+    def fisher_inv(self):
+        return self.params["fisher_inv"]
diff --git a/batchglm/train/tf/base.py b/batchglm/train/tf/base.py
@@ -13,6 +13,25 @@
 from .train import StopAtLossHook, TimedRunHook
 
 
+# def model_param(f: callable, key: str, param_dict):
+#     """
+#     Special decorator for TFEstimator's model params.
+#
+#     :param f: the function to decorate
+#     :param key: the name of the data item to fetch
+#     :param param_dict: the dict where to add the function
+#     :return: decorated function without the "data" parameter
+#     """
+#
+#     def wrap_fn(self, *args, **kwargs):
+#         data = self._get_unsafe(key)
+#         return f(self, data, *args, **kwargs)
+#
+#     param_dict[key] = wrap_fn
+#
+#     return wrap_fn
+
+
 class TFEstimatorGraph(metaclass=abc.ABCMeta):
     graph: tf.Graph
     loss: tf.Tensor
@@ -88,9 +107,13 @@ class TrainingStrategy(Enum):
     session: tf.Session
     feed_dict: Dict[Union[Union[tf.Tensor, tf.Operation], Any], Any]
     
+    _param_decorators: Dict[str, callable]
+    
     def __init__(self, tf_estimator_graph):
         self.model = tf_estimator_graph
         self.session = None
+        
+        self._param_decorators = dict()
     
     def initialize(self):
         self.close_session()
@@ -135,11 +158,11 @@ def get(self, key: Union[str, Iterable]) -> Union[Any, Dict[str, Any]]:
     
     @property
     def global_step(self):
-        return self.get("global_step")
+        return self._get_unsafe("global_step")
     
     @property
     def loss(self):
-        return self.get("loss")
+        return self._get_unsafe("loss")
     
     def _train_to_convergence(self,
                               train_op,
diff --git a/batchglm/train/tf/nb_glm/estimator.py b/batchglm/train/tf/nb_glm/estimator.py
@@ -199,20 +199,24 @@ def feature_wise_hessians(X, design_loc, design_scale, a, b, size_factors=None)
     
     def hessian(data):  # data is tuple (X_t, a_t, b_t)
         X_t, a_t, b_t = data
-        X = tf.transpose(X_t)
-        a = tf.transpose(a_t)
-        b = tf.transpose(b_t)
+        X = tf.transpose(X_t)  # observations x features
+        a = tf.transpose(a_t)  # design_loc_params x features
+        b = tf.transpose(b_t)  # design_scale_params x features
         
-        model = BasicModelGraph(X, design_loc, design_scale, a, b, size_factors=size_factors)
+        # cheat Tensorflow to get also dX^2/(da,db)
+        param_vec = tf.concat([a, b], axis=0, name="param_vec")
+        a_split, b_split = tf.split(param_vec, tf.TensorShape([a.shape[0], b.shape[0]]))
         
-        hess = tf.hessians(-model.log_likelihood, [a, b])
+        model = BasicModelGraph(X, design_loc, design_scale, a_split, b_split, size_factors=size_factors)
+        
+        hess = tf.hessians(-model.log_likelihood, param_vec)
         
         return hess
     
     hessians = tf.map_fn(
         fn=hessian,
         elems=(X_t, a_t, b_t),
-        dtype=[tf.float32, tf.float32],  # hessians of [a, b]
+        dtype=[tf.float32],  # hessians of [a, b]
         parallel_iterations=pkg_constants.TF_LOOP_PARALLEL_ITERATIONS
     )
     
@@ -271,6 +275,7 @@ def hessian_red(prev, cur):
                 reduce_fn=hessian_red,
                 parallel_iterations=1,
             )
+            hessians = hessians[0]
         
         self.X = model.X
         self.design_loc = model.design_loc
@@ -395,17 +400,17 @@ def __init__(
                 )
                 full_data_loss = full_data_model.loss
                 
-                with tf.name_scope("hessian_diagonal"):
-                    hessian_diagonal = [
-                        tf.map_fn(
-                            # elems=tf.transpose(hess, perm=[2, 0, 1]),
-                            elems=hess,
-                            fn=tf.diag_part,
-                            parallel_iterations=pkg_constants.TF_LOOP_PARALLEL_ITERATIONS
-                        )
-                        for hess in full_data_model.hessians
-                    ]
-                    fisher_a, fisher_b = hessian_diagonal
+                # with tf.name_scope("hessian_diagonal"):
+                #     hessian_diagonal = [
+                #         tf.map_fn(
+                #             # elems=tf.transpose(hess, perm=[2, 0, 1]),
+                #             elems=hess,
+                #             fn=tf.diag_part,
+                #             parallel_iterations=pkg_constants.TF_LOOP_PARALLEL_ITERATIONS
+                #         )
+                #         for hess in full_data_model.hessians
+                #     ]
+                #     fisher_a, fisher_b = hessian_diagonal
                 
                 mu = full_data_model.mu
                 r = full_data_model.r
@@ -552,12 +557,8 @@ def __init__(
         
         # we are minimizing the negative LL instead of maximizing the LL
         # => invert hessians
-        self.hessian_diagonal = - tf.concat([
-            fisher_a,
-            fisher_b,
-        ], axis=-1)
-        self.fisher_loc = tf.transpose(fisher_a, name="fisher_loc")
-        self.fisher_scale = tf.transpose(fisher_b, name="fisher_scale")
+        self.hessians = - full_data_model.hessians
+        self.fisher_inv = tf.matrix_inverse(full_data_model.hessians)
         
         with tf.name_scope('summaries'):
             tf.summary.histogram('a', model_vars.a)
@@ -715,7 +716,7 @@ def __init__(self,
                     X = input_data.X.assign_coords(group=(("observations",), inverse_idx))
                     mean = X.groupby("group").mean(dim="observations")
                     
-                    [X[inverse_idx==i].mean(dim="observations").values for i in np.unique(inv_design)]
+                    [X[inverse_idx == i].mean(dim="observations").values for i in np.unique(inv_design)]
                     a = np.log(mean)
                     # a = a * np.eye(np.size(a))
                     a_prime = np.matmul(inv_design, a)
@@ -978,16 +979,12 @@ def gradient(self):
         return self.to_xarray("full_gradient", coords=self.input_data.data.coords)
     
     @property
-    def hessian_diagonal(self):
-        return self.to_xarray("hessian_diagonal", coords=self.input_data.data.coords)
-    
-    @property
-    def fisher_loc(self):
-        return self.to_xarray("fisher_loc", coords=self.input_data.data.coords)
+    def hessians(self):
+        return self.to_xarray("hessians", coords=self.input_data.data.coords)
     
     @property
-    def fisher_scale(self):
-        return self.to_xarray("fisher_scale", coords=self.input_data.data.coords)
+    def fisher_inv(self):
+        return self.to_xarray("fisher_inv", coords=self.input_data.data.coords)
     
     def finalize(self):
         store = XArrayEstimatorStore(self)
diff --git a/batchglm/unit_test/test_nb.py b/batchglm/unit_test/test_nb.py
@@ -54,7 +54,7 @@ def test_default_fit(self):
         estimator = estimator.finalize()
         print(estimator.mu.values)
         print(estimator.gradient.values)
-        print(estimator.hessian_diagonal.values)
+        print(estimator.hessians.values)
         print(estimator.probs().values)
         print(estimator.log_probs().values)
 
diff --git a/batchglm/unit_test/test_nb_glm.py b/batchglm/unit_test/test_nb_glm.py
@@ -30,7 +30,7 @@ def estimate(input_data: InputData, working_dir: str):
     )
     input_data.save(os.path.join(working_dir, "input_data.h5"))
     
-    estimator.train_sequence()
+    estimator.train_sequence(training_strategy="QUICK")
     
     return estimator
 
@@ -69,7 +69,7 @@ def test_default_fit(self):
         estimator = estimator.finalize()
         print(estimator.mu.values)
         print(estimator.gradient.values)
-        print(estimator.hessian_diagonal.values)
+        print(estimator.hessians.values)
         print(estimator.probs().values)
         print(estimator.log_probs().values)
         
@@ -116,7 +116,8 @@ def test_nonconfounded_fit(self):
         estimator = estimator.finalize()
         print(estimator.mu.values)
         print(estimator.gradient.values)
-        print(estimator.hessian_diagonal.values)
+        print(estimator.hessians.values)
+        print(estimator.fisher_inv.values)
         print(estimator.probs().values)
         print(estimator.log_probs().values)