-
-
Notifications
You must be signed in to change notification settings - Fork 52
fix(manager): detect existing server on startup, avoid port conflict #123
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1de7232
5af82d8
cc60155
19c6a98
3abf6e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,9 +3,11 @@ | |
| import logging | ||
| import subprocess | ||
| import platform | ||
| import urllib.error | ||
| import urllib.request | ||
| from pathlib import Path | ||
| from glob import glob | ||
| from time import sleep | ||
| from time import monotonic, sleep | ||
| from typing import Optional, List, Hashable, Set, Iterable | ||
|
|
||
| import aw_core | ||
|
|
@@ -148,6 +150,10 @@ def __init__(self, name: str, path: Path, type: str) -> None: | |
| # self.location = "system" if _is_system_module(name) else "bundled" | ||
| self._process: Optional[subprocess.Popen[str]] = None | ||
| self._last_process: Optional[subprocess.Popen[str]] = None | ||
| self._external_server: bool = False # True if we detected an already-running server | ||
| self._external_server_testing: bool = False | ||
| self._external_server_probe_cache: Optional[bool] = None | ||
| self._external_server_probe_cache_at: float = 0.0 | ||
|
|
||
| def __hash__(self) -> int: | ||
| return hash((self.name, self.path)) | ||
|
|
@@ -158,9 +164,61 @@ def __eq__(self, other: Hashable) -> bool: | |
| def __repr__(self) -> str: | ||
| return f"<Module {self.name} at {self.path}>" | ||
|
|
||
| def _get_server_port(self, testing: bool) -> Optional[int]: | ||
| if self.name not in ("aw-server", "aw-server-rust"): | ||
| return None | ||
|
|
||
| from .config import _read_aw_server_port, _read_server_rust_port | ||
|
|
||
| if self.name == "aw-server": | ||
| return _read_aw_server_port(testing) or (5666 if testing else 5600) | ||
| return _read_server_rust_port(testing) or (5666 if testing else 5600) | ||
|
|
||
| def _probe_external_server(self, testing: bool, timeout: float = 0.2) -> bool: | ||
| port = self._get_server_port(testing) | ||
| if port is None: | ||
| return False | ||
|
|
||
| try: | ||
| with urllib.request.urlopen( | ||
| f"http://localhost:{port}/api/0/info", timeout=timeout | ||
| ): | ||
| return True | ||
| except (urllib.error.URLError, OSError): | ||
| return False | ||
|
|
||
| def _probe_external_server_cached(self, testing: bool, max_age: float = 1.0) -> bool: | ||
| now = monotonic() | ||
| if ( | ||
| self._external_server_probe_cache is not None | ||
| and now - self._external_server_probe_cache_at < max_age | ||
| ): | ||
| return self._external_server_probe_cache | ||
|
|
||
| alive = self._probe_external_server(testing) | ||
| self._external_server_probe_cache = alive | ||
| self._external_server_probe_cache_at = now | ||
| return alive | ||
|
|
||
| def start(self, testing: bool) -> None: | ||
| logger.info(f"Starting module {self.name}") | ||
|
|
||
| # For server modules, check if a server is already running before attempting | ||
| # to start one. This avoids port conflicts and the confusing "Restart" requirement | ||
| # when aw-server is managed externally (e.g. via systemd or Docker). | ||
| if self._probe_external_server(testing, timeout=1.0): | ||
| port = self._get_server_port(testing) | ||
| logger.info( | ||
| f"{self.name}: server already running on port {port}, " | ||
| "using existing instance instead of starting a new one" | ||
| ) | ||
| self._external_server = True | ||
| self._external_server_testing = testing | ||
| self._external_server_probe_cache = True | ||
| self._external_server_probe_cache_at = monotonic() | ||
| self.started = True | ||
| return | ||
|
|
||
| exec_cmd = [str(self.path)] | ||
| if testing: | ||
| exec_cmd.append("--testing") | ||
|
|
@@ -195,6 +253,17 @@ def stop(self) -> None: | |
| f"Tried to stop module {self.name}, but it hasn't been started" | ||
| ) | ||
| return | ||
| elif self._external_server: | ||
| # We didn't start this server, so don't stop it — it's managed externally | ||
| logger.info( | ||
| f"Module {self.name} is using an external server instance, not stopping it" | ||
| ) | ||
| self._external_server = False | ||
| self._external_server_testing = False | ||
| self._external_server_probe_cache = None | ||
| self._external_server_probe_cache_at = 0.0 | ||
| self.started = False | ||
| return | ||
| elif not self.is_alive(): | ||
| logger.warning(f"Tried to stop module {self.name}, but it wasn't running") | ||
| else: | ||
|
|
@@ -223,6 +292,17 @@ def toggle(self, testing: bool) -> None: | |
| self.start(testing) | ||
|
|
||
| def is_alive(self) -> bool: | ||
| if self._external_server: | ||
| # We don't own this process, so re-probe the server instead. | ||
| # Cache short-lived probe results to avoid blocking repeated UI poll | ||
| # cycles on the main thread when multiple checks happen close together. | ||
| alive = self._probe_external_server_cached(self._external_server_testing) | ||
| if not alive: | ||
| self._external_server = False | ||
| self._external_server_testing = False | ||
| self._external_server_probe_cache = None | ||
| self._external_server_probe_cache_at = 0.0 | ||
| return alive | ||
|
Comment on lines
294
to
+305
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Repeated blocking HTTP probe inside
Unlike the one-off probe in import time
_PROBE_CACHE_TTL = 5.0 # seconds
def is_alive(self) -> bool:
if self._external_server:
now = time.monotonic()
if now - getattr(self, "_last_probe_time", 0) >= _PROBE_CACHE_TTL:
alive = self._probe_external_server(self._external_server_testing)
self._last_probe_time = now
self._last_probe_result = alive
if not alive:
self._external_server = False
self._external_server_testing = False
return alive
return self._last_probe_result
... |
||
| if self._process is None: | ||
| return False | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Startup probe timeout too short — may miss a loaded server
_probe_external_serverusestimeout=0.2for both the one-shot startup check (called fromstart()) and the repeated liveness probe (called fromis_alive()via_probe_external_server_cached).The 0.2 s limit makes sense for liveness polls to avoid stalling the Qt main thread. However, the startup probe in
start()fires only once per session and blocks just as long either way. If the external server is momentarily loaded or slow (e.g., JVM / Python startup warmup), the probe can time out and returnFalse, causing aw-qt to attempt launching a second instance — triggering the exact "address already in use" error this PR is intended to fix.Consider using a separate, larger timeout for the startup case. One clean approach is a dedicated parameter on
_probe_external_server:Then in
start():This keeps the liveness path at 0.2 s while giving the startup probe a fighting chance on a loaded system.