From 20765c150eeefdb72c6e4c0c00844566ee186f97 Mon Sep 17 00:00:00 2001 From: Jonas Rembser Date: Mon, 4 Sep 2023 14:49:46 +0200 Subject: [PATCH] [Python] Set memory policy to "strict" The ROOT Python interfaces have many memory leaks, which is a major pain point for people using it for long-running scripts in batch jobs. One source of memory leaks was indentified to be the "heuristic memory policy" of cppyy. This means that cppyy assumes that every non-const pointer member function argument was interpreted as the object taking ownership if the argument. For examle, take the non-owning RooLinkedList container. It has a `RooLinkedList::Add(RooAbsArg *arg)` method. ROOT wrongly assumes that this means the RooLinkedList takes ownership of arg, and it drops the ROOT overship. Nobody feels responsible for deleting the object anymore, and there is a memory leak or `arg`. That particular leak was reported in this forum post: https://root-forum.cern.ch/t/memory-leak-in-fits/56249 Function parameters of type `T *` are very common in ROOT, and only rarely do they imply ownership transfer. So changing the memory policy to "strict" would surely fix also many other memory leaks that are not reported so far. In fact, upstream cppyy doesn't even have this heuristic memory policy anymore! So moving ROOT also to the strict memory policy closes the gap between ROOT and cppyy. The potential drawback of this change are crashes in usercode if memory is not properly managed. But these problems should either be fixed by: * the user * dedicated pythonizations for these methods to manage shared ownership via Python reference counters (i.e., setting the parameter as an attribute of the object that the member function was called on) This follows up on PR #4294, in particular it reverts 3a12063116. Note that owning **TCollection**, **TSeqCollection**, and **TList** instances are Pythonized to preserve the old behavior of dropping Python ownership of added elements, as that was the case where the memory heuristic was correct. --- README/ReleaseNotes/v640/index.md | 51 ++++++++++++++++++- bindings/pyroot/pythonizations/CMakeLists.txt | 1 + .../pythonizations/python/ROOT/_facade.py | 19 ++++--- .../ROOT/_pythonization/_memory_utils.py | 25 +++++++++ .../ROOT/_pythonization/_roofit/__init__.py | 2 + .../ROOT/_pythonization/_roofit/_rooabspdf.py | 12 +++-- .../ROOT/_pythonization/_roofit/_rooplot.py | 20 ++++++++ .../ROOT/_pythonization/_tcollection.py | 15 ++++++ .../python/ROOT/_pythonization/_tgraph.py | 19 +++++++ .../ROOT/_pythonization/_tseqcollection.py | 39 ++++++++++++++ .../test/tcollection_listmethods.py | 6 ++- .../test/tseqcollection_itemaccess.py | 13 ++++- .../test/tseqcollection_listmethods.py | 1 + 13 files changed, 209 insertions(+), 14 deletions(-) create mode 100644 bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooplot.py diff --git a/README/ReleaseNotes/v640/index.md b/README/ReleaseNotes/v640/index.md index 433e1bd6124b6..328da025e1285 100644 --- a/README/ReleaseNotes/v640/index.md +++ b/README/ReleaseNotes/v640/index.md @@ -39,7 +39,7 @@ The following people have contributed to this new version: ## Removals * The `TH1K` class was removed. `TMath::KNNDensity` can be used in its stead. -* The `TObject` equality operator pythonization (`TObject.__eq__`) that was deprecated in ROOT 6.38 and scheduled for removal in ROOT 6.40 is removed. +* The `TObject` equality operator Pythonization (`TObject.__eq__`) that was deprecated in ROOT 6.38 and scheduled for removal in ROOT 6.40 is removed. * Comparing C++ `nullptr` objects with `None` in Python now raises a `TypeError`, as announced in the ROOT 6.38 release notes. Use truth-value checks like `if not x` or `x is None` instead. * The `TGLIncludes.h` and `TGLWSIncludes.h` that were deprecated in ROOT 6.38 and scheduled for removal are gone now. Please include your required headers like `` or `` directly. * The GLEW headers (`GL/eglew.h`, `GL/glew.h`, `GL/glxew.h`, and `GL/wglew.h`) that were installed when building ROOT with `builtin_glew=ON` are no longer installed. This is done because ROOT is moving away from GLEW for loading OpenGL extensions. @@ -122,6 +122,55 @@ As part of this migration, the following build options are deprecated. From ROOT ROOT dropped support for Python 3.9, meaning ROOT now requires at least Python 3.10. +### Changed ownership policy for non-`const` pointer member function parameters + +If you have a member function taking a raw pointer, like `MyClass::foo(T *obj)`, +calling such method on a Python object `my_instance` of type `MyClass` +would assume that the memory ownership of `obj` transfers to `my_instance`. + +However, this resulted in many memory leaks, since many functions with such a +signature actually don't take ownership of the object. + +Now, the Python interface of ROOT will not make this assumption anymore. +Because of this change, some double-deletes or dangling references on the C++ +side might creep up in your scripts. These need to be fixed by properly +managing object lifetime. + +A dangling references on the C++ side is a reference or pointer that refers to +an object that the Python side has already deleted. +Possible remedies: + + 1. Assigning the object to a Python variable that lives at least as long as + the C++ reference to keep the pointed-to object alive + 2. If the C++ reference comes from a non-owning collection, like a + default-constructed **TCollection** (e.g. a **TList**), you can also transfer + the ownership to the C++ side explicitly by calling `coll.SetOwner(True)` + or migrate to another owning collection type like + `std::vector>`. In the general case of owning collections, + you will have to relinquish ownership on the Python side with + `ROOT.SetOwnership(obj, False)`. + +**Note:** **TCollection**-derived classes are Pythonized such that when an +object is added to an owning collection with `TCollection::Add()`, the Python +ownership is still dropped automatically. If you see other places where ROOT +can benefit from this Pythonization, please report them in a GitHub issue. You +can also [Pythonize classes](https://root.cern/doc/master/group__Pythonizations.html) from outside ROOT if needed. + +The double-delete problems can be fixed in a similar ways, but this time it's +not necessary to make sure that the object is owned by C++. It there was a +double-delete, that means the object was owned by C++ already. So the possible +solutions are: + + 1. Dropping the ownership on the Python side with `ROOT.SetOwnership(obj, False)` + 3. Pythonizing the member function that drops the ownership on the Python + side, like the **TCollection** Pythonization explained above + +**Important:** You can change back to the old policy by calling +`ROOT.SetHeuristicMemoryPolicy(True)` after importing ROOT, but this should be +only used for debugging purposes and this function might be **removed in ROOT +6.42**! + + ## Command-line utilities ## JavaScript ROOT diff --git a/bindings/pyroot/pythonizations/CMakeLists.txt b/bindings/pyroot/pythonizations/CMakeLists.txt index 563f5eb13e0a4..076dd3de1722b 100644 --- a/bindings/pyroot/pythonizations/CMakeLists.txt +++ b/bindings/pyroot/pythonizations/CMakeLists.txt @@ -38,6 +38,7 @@ if(roofit) ROOT/_pythonization/_roofit/_roojsonfactorywstool.py ROOT/_pythonization/_roofit/_roomcstudy.py ROOT/_pythonization/_roofit/_roomsgservice.py + ROOT/_pythonization/_roofit/_rooplot.py ROOT/_pythonization/_roofit/_rooprodpdf.py ROOT/_pythonization/_roofit/_roorealvar.py ROOT/_pythonization/_roofit/_roosimultaneous.py diff --git a/bindings/pyroot/pythonizations/python/ROOT/_facade.py b/bindings/pyroot/pythonizations/python/ROOT/_facade.py index 2b91ea4b6de0d..16ed2cc17a4fc 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_facade.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_facade.py @@ -77,7 +77,6 @@ def __init__(self, module, is_ipython): "bind_object", "as_cobject", "addressof", - "SetHeuristicMemoryPolicy", "SetImplicitSmartPointerConversion", "SetOwnership", ] @@ -103,6 +102,19 @@ def __init__(self, module, is_ipython): self.__class__.__getattr__ = self._getattr self.__class__.__setattr__ = self._setattr + def SetHeuristicMemoryPolicy(self, enabled): + import textwrap + import warnings + + msg = """ROOT.SetHeuristicMemoryPolicy() is deprecated and will be removed in ROOT 6.42. + + Since ROOT 6.40, the heuristic memory policy is disabled by default, and with + ROOT 6.42 it won't be possible to re-enable it with ROOT.SetHeuristicMemoryPolicy(), + which was only meant to be used during a transition period and will be removed. + """ + warnings.warn(textwrap.dedent(msg), FutureWarning, stacklevel=0) + return cppyy._backend.SetHeuristicMemoryPolicy(enabled) + def AddressOf(self, obj): # Return an indexable buffer of length 1, whose only element # is the address of the object. @@ -192,11 +204,6 @@ def _finalSetup(self): if not self.gROOT.IsBatch() and self.PyConfig.StartGUIThread: self.app.init_graphics() - # Set memory policy to kUseHeuristics. - # This restores the default in PyROOT which was changed - # by new Cppyy - self.SetHeuristicMemoryPolicy(True) - # The automatic conversion of ordinary obejcts to smart pointers is # disabled for ROOT because it can cause trouble with overload # resolution. If a function has overloads for both ordinary objects and diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_memory_utils.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_memory_utils.py index 350012b2e687d..a26d474ba50a2 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_memory_utils.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_memory_utils.py @@ -63,3 +63,28 @@ def _SetDirectory_SetOwnership(self, dir): import ROOT ROOT.SetOwnership(self, False) + + +def declare_cpp_owned_arg(position, name, positional_args, keyword_args, condition=lambda _: True): + """ + Helper function to drop Python ownership of a specific function argument + with a given position and name, referring to the C++ signature. + + positional_args and keyword_args should be the usual args and kwargs passed + to the function, and condition is an optional condition on which the Python + ownership is dropped. + """ + import ROOT + + # has to match the C++ argument name + if name in keyword_args: + arg = keyword_args[name] + elif len(positional_args) > position: + arg = positional_args[0] + else: + # This can happen if the function was called with too few arguments. + # In that case we should not do anything. + return + + if condition(arg): + ROOT.SetOwnership(arg, False) diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/__init__.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/__init__.py index 8168f17c81fbc..59abc3367d0ae 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/__init__.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/__init__.py @@ -40,6 +40,7 @@ from ._roojsonfactorywstool import RooJSONFactoryWSTool from ._roomcstudy import RooMCStudy from ._roomsgservice import RooMsgService +from ._rooplot import RooPlot from ._rooprodpdf import RooProdPdf from ._roorealvar import RooRealVar from ._roosimultaneous import RooSimultaneous @@ -69,6 +70,7 @@ RooJSONFactoryWSTool, RooMCStudy, RooMsgService, + RooPlot, RooProdPdf, RooRealVar, RooSimultaneous, diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooabspdf.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooabspdf.py index 3a33c61037534..a7318154ae892 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooabspdf.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooabspdf.py @@ -31,13 +31,19 @@ def _pack_cmd_args(*args, **kwargs): assert len(kwargs) == 0 # Put RooCmdArgs in a RooLinkedList - cmdList = ROOT.RooLinkedList() + cmd_list = ROOT.RooLinkedList() for cmd in args: if not isinstance(cmd, ROOT.RooCmdArg): raise TypeError("This function only takes RooFit command arguments.") - cmdList.Add(cmd) + cmd_list.Add(cmd) - return cmdList + # The RooLinkedList passed to functions like fitTo() is expected to be + # non-owning. To make sure that the RooCmdArgs live long enough, we attach + # then as an attribute of the output list, such that the Python reference + # counter doesn't hit zero. + cmd_list._owning_pylist = args + + return cmd_list class RooAbsPdf(RooAbsReal): diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooplot.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooplot.py new file mode 100644 index 0000000000000..bce298bc04e45 --- /dev/null +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_roofit/_rooplot.py @@ -0,0 +1,20 @@ +# Authors: +# * Jonas Rembser 09/2023 + +################################################################################ +# Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. # +# All rights reserved. # +# # +# For the licensing terms see $ROOTSYS/LICENSE. # +# For the list of contributors see $ROOTSYS/README/CREDITS. # +################################################################################ + + +class RooPlot(object): + def addObject(self, *args, **kwargs): + from ROOT._pythonization._memory_utils import declare_cpp_owned_arg + + # Python should transfer the ownership to the RooPlot + declare_cpp_owned_arg(0, "obj", args, kwargs) + + return self._addObject(*args, **kwargs) diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tcollection.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tcollection.py index b984f2c261ff4..c136c2e15fffa 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tcollection.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tcollection.py @@ -93,11 +93,26 @@ def _iter_pyz(self): yield o +def _TCollection_Add(self, *args, **kwargs): + from ROOT._pythonization._memory_utils import declare_cpp_owned_arg + + def condition(_): + return self.IsOwner() + + declare_cpp_owned_arg(0, "obj", args, kwargs, condition=condition) + + self._Add(*args, **kwargs) + + @pythonization('TCollection') def pythonize_tcollection(klass): # Parameters: # klass: class to be pythonized + # Pythonize Add() + klass._Add = klass.Add + klass.Add = _TCollection_Add + # Add Python lists methods klass.append = klass.Add klass.remove = _remove_pyz diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tgraph.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tgraph.py index 0bc29dee280b5..5badaf186e83c 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tgraph.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tgraph.py @@ -181,6 +181,9 @@ import cppyy +from . import pythonization + + def set_size(self, buf): # Parameters: # - self: graph object @@ -213,3 +216,19 @@ def set_size(self, buf): # Add the composite to the list of pythonizors cppyy.py.add_pythonization(comp) +def _TMultiGraph_Add(self, *args, **kwargs): + """ + The TMultiGraph always takes ownership of the added graphs. + """ + from ROOT._pythonization._memory_utils import declare_cpp_owned_arg + + declare_cpp_owned_arg(0, "graph", args, kwargs) + + self._Add(*args, **kwargs) + + +@pythonization("TMultiGraph") +def pythonize_tmultigraph(klass): + # Pythonizations for TMultiGraph::Add + klass._Add = klass.Add + klass.Add = _TMultiGraph_Add diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tseqcollection.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tseqcollection.py index b8589bad50542..188f69d3e5880 100644 --- a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tseqcollection.py +++ b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tseqcollection.py @@ -200,7 +200,10 @@ def _reverse_pyz(self): return t = tuple(self) + was_owner = self.IsOwner() + self.SetOwner(False) self.Clear() + self.SetOwner(was_owner) for elem in t: self.AddAt(elem, 0) @@ -222,7 +225,10 @@ def _sort_pyz(self, *args, **kwargs): # Sort in a Python list copy pylist = list(self) pylist.sort(*args, **kwargs) + was_owner = self.IsOwner() + self.SetOwner(False) self.Clear() + self.SetOwner(was_owner) self.extend(pylist) @@ -241,11 +247,30 @@ def _index_pyz(self, val): return idx +def _TSeqCollection_AddAt(self, *args, **kwargs): + from ROOT._pythonization._memory_utils import declare_cpp_owned_arg + + def condition(_): + return self.IsOwner() + + declare_cpp_owned_arg(0, "obj", args, kwargs, condition=condition) + + self._AddAt(*args, **kwargs) + + @pythonization('TSeqCollection') def pythonize_tseqcollection(klass): + from ROOT._pythonization._tcollection import _TCollection_Add + # Parameters: # klass: class to be pythonized + # Pythonize Add() methods + klass._Add = klass.Add + klass.Add = _TCollection_Add + klass._AddAt = klass.AddAt + klass.AddAt = _TSeqCollection_AddAt + # Item access methods klass.__getitem__ = _getitem_pyz klass.__setitem__ = _setitem_pyz @@ -257,3 +282,17 @@ def pythonize_tseqcollection(klass): klass.reverse = _reverse_pyz klass.sort = _sort_pyz klass.index = _index_pyz + + +@pythonization("TList") +def pythonize_tlist(klass): + from ROOT._pythonization._tcollection import _TCollection_Add + + # Parameters: + # klass: class to be pythonized + + # Pythonize Add() methods + klass._Add = klass.Add + klass.Add = _TCollection_Add + klass._AddAt = klass.AddAt + klass.AddAt = _TSeqCollection_AddAt diff --git a/bindings/pyroot/pythonizations/test/tcollection_listmethods.py b/bindings/pyroot/pythonizations/test/tcollection_listmethods.py index 496f7f8277c1e..246ac9de016f3 100644 --- a/bindings/pyroot/pythonizations/test/tcollection_listmethods.py +++ b/bindings/pyroot/pythonizations/test/tcollection_listmethods.py @@ -14,10 +14,9 @@ class TCollectionListMethods(unittest.TestCase): # Helpers def create_tcollection(self): c = ROOT.TList() + c.SetOwner(True) for _ in range(self.num_elems): o = ROOT.TObject() - # Prevent immediate deletion of C++ TObjects - ROOT.SetOwnership(o, False) c.Add(o) return c @@ -76,6 +75,7 @@ def test_extend(self): len1 = c1.GetEntries() len2 = c2.GetEntries() + c2.SetOwner(False) c1.extend(c2) len1_final = c1.GetEntries() @@ -105,6 +105,8 @@ def test_count(self): self.assertEqual(c.count(o1), 2) self.assertEqual(c.count(o2), 1) + c.Clear() + if __name__ == '__main__': unittest.main() diff --git a/bindings/pyroot/pythonizations/test/tseqcollection_itemaccess.py b/bindings/pyroot/pythonizations/test/tseqcollection_itemaccess.py index abf51a0ea1871..2def110a5c189 100644 --- a/bindings/pyroot/pythonizations/test/tseqcollection_itemaccess.py +++ b/bindings/pyroot/pythonizations/test/tseqcollection_itemaccess.py @@ -15,10 +15,9 @@ class TSeqCollectionItemAccess(unittest.TestCase): # Helpers def create_tseqcollection(self): sc = ROOT.TList() + sc.SetOwner(True) for _ in range(self.num_elems): o = ROOT.TObject() - # Prevent immediate deletion of C++ TObjects - ROOT.SetOwnership(o, False) sc.Add(o) return sc @@ -50,6 +49,7 @@ def test_getitem(self): def test_getitem_slice(self): sc = self.create_tseqcollection() + sc.SetOwner(False) # All items slice1 = sc[:] @@ -125,7 +125,9 @@ def test_setitem(self): def test_setitem_slice(self): sc1 = self.create_tseqcollection() + sc1.SetOwner(False) sc2 = self.create_tseqcollection() + sc2.SetOwner(False) # Replace all items sc1[:] = sc2 @@ -135,6 +137,7 @@ def test_setitem_slice(self): # Append items sc1 = self.create_tseqcollection() + sc1.SetOwner(False) l1 = [elem for elem in sc1] sc1[self.num_elems:] = sc2 @@ -151,6 +154,7 @@ def test_setitem_slice(self): # Assign second item. # This time use a Python list as assigned value sc3 = self.create_tseqcollection() + sc3.SetOwner(False) l2 = [ ROOT.TObject() ] l3 = [ elem for elem in sc3 ] @@ -164,6 +168,7 @@ def test_setitem_slice(self): # Assign second and third items to just one item. # This tests that the third item is removed sc4 = self.create_tseqcollection() + sc4.SetOwner(False) l4 = [ ROOT.TObject() ] l5 = [ elem for elem in sc4 ] @@ -175,6 +180,7 @@ def test_setitem_slice(self): # Assign with step sc5 = self.create_tseqcollection() + sc5.SetOwner(False) o = sc5[1] len6 = 2 l6 = [ ROOT.TObject() for _ in range(len6) ] @@ -196,6 +202,7 @@ def test_setitem_slice(self): # Step cannot be zero sc6 = self.create_tseqcollection() + sc6.SetOwner(False) with self.assertRaises(ValueError): sc6[::0] = [ ROOT.TObject() ] @@ -237,6 +244,8 @@ def test_delitem(self): with self.assertRaises(TypeError): del sc[1.0] + sc.Clear() + def test_delitem_slice(self): # Delete all items sc1 = self.create_tseqcollection() diff --git a/bindings/pyroot/pythonizations/test/tseqcollection_listmethods.py b/bindings/pyroot/pythonizations/test/tseqcollection_listmethods.py index c2161f39e376c..55d1428f05550 100644 --- a/bindings/pyroot/pythonizations/test/tseqcollection_listmethods.py +++ b/bindings/pyroot/pythonizations/test/tseqcollection_listmethods.py @@ -14,6 +14,7 @@ class TSeqCollectionListMethods(unittest.TestCase): # Helpers def create_tseqcollection(self): sc = ROOT.TList() + sc.SetOwner(True) for i in reversed(range(self.num_elems)): sc.Add(ROOT.TObjString(str(i)))