diff --git a/roll/distributed/scheduler/resource_manager.py b/roll/distributed/scheduler/resource_manager.py index ac9810f4..ee2abc93 100644 --- a/roll/distributed/scheduler/resource_manager.py +++ b/roll/distributed/scheduler/resource_manager.py @@ -89,7 +89,8 @@ def nodes_placement_group(self, node_rank) -> PlacementGroup: return self.node2pg[node_rank] def destroy_placement_group(self): - [ray.util.remove_placement_group(pg) for pg in self.placement_groups] + for pg in self.placement_groups: + ray.util.remove_placement_group(pg) def allocate_placement_group(self, world_size, device_mapping: List[int] = None) -> List[List[Dict]]: """ diff --git a/roll/utils/network_utils.py b/roll/utils/network_utils.py index a9719f6d..33ee6adc 100644 --- a/roll/utils/network_utils.py +++ b/roll/utils/network_utils.py @@ -3,8 +3,11 @@ def get_node_ip(): s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect(("8.8.8.8", 80)) - return s.getsockname()[0] + try: + s.connect(("8.8.8.8", 80)) + return s.getsockname()[0] + finally: + s.close() def collect_free_port():