diff --git a/superset/commands/report/execute.py b/superset/commands/report/execute.py index 992772165fca..f5cba4b18d02 100644 --- a/superset/commands/report/execute.py +++ b/superset/commands/report/execute.py @@ -525,7 +525,11 @@ def _get_csv_data(self) -> bytes: self._update_query_context() try: - csv_data = get_chart_csv_data(chart_url=url, auth_cookies=auth_cookies) + csv_data = get_chart_csv_data( + chart_url=url, + auth_cookies=auth_cookies, + timeout=app.config["ALERT_REPORTS_CSV_REQUEST_TIMEOUT"], + ) elapsed_seconds = (datetime.utcnow() - start_time).total_seconds() logger.info( "CSV data generation from %s as user %s took %.2fs - execution_id: %s", @@ -575,7 +579,11 @@ def _get_embedded_data(self) -> pd.DataFrame: self._update_query_context() try: - dataframe = get_chart_dataframe(url, auth_cookies) + dataframe = get_chart_dataframe( + url, + auth_cookies, + timeout=app.config["ALERT_REPORTS_CSV_REQUEST_TIMEOUT"], + ) elapsed_seconds = (datetime.utcnow() - start_time).total_seconds() logger.info( "DataFrame generation from %s as user %s took %.2fs - execution_id: %s", diff --git a/superset/config.py b/superset/config.py index 2a44424afc6a..74412109c16f 100644 --- a/superset/config.py +++ b/superset/config.py @@ -1153,6 +1153,12 @@ class D3TimeFormat(TypedDict, total=False): # Time before selenium times out after waiting for all DOM class elements named # "loading" are gone. SCREENSHOT_LOAD_WAIT = int(timedelta(minutes=1).total_seconds()) +# Maximum time (in seconds) selenium waits for an initial page navigation +# (driver.get) to complete. Without it the navigation blocks indefinitely when +# the target page never finishes loading (e.g. an unreachable WEBDRIVER_BASEURL), +# which leaves the report schedule stuck in the WORKING state. Set to None to +# disable (not recommended). +SCREENSHOT_PAGE_LOAD_WAIT = int(timedelta(minutes=2).total_seconds()) # Selenium destroy retries SCREENSHOT_SELENIUM_RETRIES = 5 # Give selenium an headstart, in seconds @@ -1736,6 +1742,11 @@ def allowed_schemas_for_csv_upload( # pylint: disable=unused-argument # If True creates a default SSL context with ssl.Purpose.CLIENT_AUTH using the # default system root CA certificates. SMTP_SSL_SERVER_AUTH = False +# Socket timeout (in seconds) for the SMTP connection used when sending +# alert/report emails. Without a timeout the underlying socket blocks +# indefinitely if the SMTP server becomes unreachable, which leaves report +# schedules stuck in the WORKING state. Set to None to disable (not recommended). +SMTP_TIMEOUT = 30 ENABLE_CHUNK_ENCODING = False # Whether to bump the logging level to ERROR on the flask_appbuilder package @@ -2084,6 +2095,12 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument # noq # Max tries to run queries to prevent false errors caused by transient errors # being returned to users. Set to a value >1 to enable retries. ALERT_REPORTS_QUERY_EXECUTION_MAX_TRIES = 1 +# Socket timeout (in seconds) for the HTTP request that fetches chart data when +# generating CSV/dataframe report attachments. Without a timeout the request +# blocks indefinitely if the Superset webserver is unreachable from the worker, +# which leaves the report schedule stuck in the WORKING state. Set to None to +# disable (not recommended). +ALERT_REPORTS_CSV_REQUEST_TIMEOUT = 60 # Custom width for screenshots ALERT_REPORTS_MIN_CUSTOM_SCREENSHOT_WIDTH = 600 ALERT_REPORTS_MAX_CUSTOM_SCREENSHOT_WIDTH = 2400 @@ -2128,6 +2145,12 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument # noq # For workspaces with 10k+ channels, consider increasing to 10 SLACK_API_RATE_LIMIT_RETRY_COUNT = 2 +# Timeout (in seconds) for outbound Slack API calls. The Slack SDK defaults to 30s; +# exposing it here lets operators grant more time for large file uploads (multi-MB +# CSVs, PDFs, screenshot sets) to congested or rate-limited Slack endpoints without +# patching code, consistent with the SMTP/CSV/screenshot timeouts. +SLACK_API_TIMEOUT = 30 + # The webdriver to use for generating reports when using Selenium (not Playwright). # This setting is ignored when PLAYWRIGHT_REPORTS_AND_THUMBNAILS is enabled, as # Playwright always uses Chromium regardless of this value. diff --git a/superset/utils/core.py b/superset/utils/core.py index f49a874f2e70..14d6d9c1be8b 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -938,6 +938,11 @@ def send_mime_email( smtp_starttls = config["SMTP_STARTTLS"] smtp_ssl = config["SMTP_SSL"] smtp_ssl_server_auth = config["SMTP_SSL_SERVER_AUTH"] + # A missing timeout means the socket blocks forever when the SMTP server is + # unreachable, wedging the report schedule in the WORKING state. Fall back to + # the key being absent for backwards compatibility with custom configs. + # Keep this fallback in sync with the SMTP_TIMEOUT default in config.py. + smtp_timeout = config.get("SMTP_TIMEOUT", 30) if dryrun: logger.info("Dryrun enabled, email notification content is below:") @@ -948,17 +953,27 @@ def send_mime_email( # root CA certificates ssl_context = ssl.create_default_context() if smtp_ssl_server_auth else None smtp = ( - smtplib.SMTP_SSL(smtp_host, smtp_port, context=ssl_context) + smtplib.SMTP_SSL( + smtp_host, smtp_port, context=ssl_context, timeout=smtp_timeout + ) if smtp_ssl - else smtplib.SMTP(smtp_host, smtp_port) + else smtplib.SMTP(smtp_host, smtp_port, timeout=smtp_timeout) ) - if smtp_starttls: - smtp.starttls(context=ssl_context) - if smtp_user and smtp_password: - smtp.login(smtp_user, smtp_password) - logger.debug("Sent an email to %s", str(e_to)) - smtp.sendmail(e_from, e_to, mime_msg.as_string()) - smtp.quit() + try: + if smtp_starttls: + smtp.starttls(context=ssl_context) + if smtp_user and smtp_password: + smtp.login(smtp_user, smtp_password) + logger.debug("Sent an email to %s", str(e_to)) + smtp.sendmail(e_from, e_to, mime_msg.as_string()) + finally: + # Always release the socket; the new timeout means starttls/login/ + # sendmail can raise, and a skipped quit() would leak connections in + # the long-lived worker process. + try: + smtp.quit() + except smtplib.SMTPException: + pass def recipients_string_to_list(address_string: str | None) -> list[str]: diff --git a/superset/utils/csv.py b/superset/utils/csv.py index f7deded9bfff..5c6decdc76a3 100644 --- a/superset/utils/csv.py +++ b/superset/utils/csv.py @@ -90,14 +90,18 @@ def escape_values(v: Any) -> Union[str, Any]: def get_chart_csv_data( - chart_url: str, auth_cookies: Optional[dict[str, str]] = None + chart_url: str, + auth_cookies: Optional[dict[str, str]] = None, + timeout: Optional[float] = None, ) -> Optional[bytes]: content = None if auth_cookies: opener = urllib.request.build_opener() cookie_str = ";".join([f"{key}={val}" for key, val in auth_cookies.items()]) opener.addheaders.append(("Cookie", cookie_str)) - response = opener.open(chart_url) + # A missing timeout means the socket blocks forever when the Superset + # webserver is unreachable, wedging the report schedule in WORKING. + response = opener.open(chart_url, timeout=timeout) content = response.read() if response.getcode() != 200: raise URLError(response.getcode()) @@ -107,11 +111,13 @@ def get_chart_csv_data( def get_chart_dataframe( - chart_url: str, auth_cookies: Optional[dict[str, str]] = None + chart_url: str, + auth_cookies: Optional[dict[str, str]] = None, + timeout: Optional[float] = None, ) -> Optional[pd.DataFrame]: # Disable all the unnecessary-lambda violations in this function # pylint: disable=unnecessary-lambda - content = get_chart_csv_data(chart_url, auth_cookies) + content = get_chart_csv_data(chart_url, auth_cookies, timeout) if content is None: return None diff --git a/superset/utils/slack.py b/superset/utils/slack.py index ce3d214af512..013cadc4020f 100644 --- a/superset/utils/slack.py +++ b/superset/utils/slack.py @@ -48,7 +48,11 @@ def get_slack_client() -> WebClient: token: str = app.config["SLACK_API_TOKEN"] if callable(token): token = token() - client = WebClient(token=token, proxy=app.config["SLACK_PROXY"]) + client = WebClient( + token=token, + proxy=app.config["SLACK_PROXY"], + timeout=app.config["SLACK_API_TIMEOUT"], + ) max_retry_count = app.config.get("SLACK_API_RATE_LIMIT_RETRY_COUNT", 2) rate_limit_handler = RateLimitErrorRetryHandler(max_retry_count=max_retry_count) diff --git a/superset/utils/webdriver.py b/superset/utils/webdriver.py index 23f30828873c..4b92bef2411c 100644 --- a/superset/utils/webdriver.py +++ b/superset/utils/webdriver.py @@ -461,9 +461,23 @@ def driver(self) -> WebDriver: self._driver = self._create() if not self._driver: raise RuntimeError("WebDriver creation failed") - self._driver.set_window_size(*self._window) - if self._user: - self._auth(self._user) + try: + self._driver.set_window_size(*self._window) + # Bound driver.get() so an unreachable page raises a + # TimeoutException instead of blocking the worker (and the + # report schedule) forever. + page_load_wait = app.config["SCREENSHOT_PAGE_LOAD_WAIT"] + if page_load_wait is not None: + self._driver.set_page_load_timeout(page_load_wait) + if self._user: + self._auth(self._user) + except Exception: + # A failure mid-setup (e.g. the new page-load timeout or auth + # raising) would otherwise leave a partially initialized, + # unauthenticated driver cached for reuse. Tear it down so the + # next access recreates it cleanly. + self._destroy() + raise return self._driver def _create_firefox_driver( diff --git a/tests/integration_tests/email_tests.py b/tests/integration_tests/email_tests.py index 16a7becc1bed..9134e66990ab 100644 --- a/tests/integration_tests/email_tests.py +++ b/tests/integration_tests/email_tests.py @@ -193,7 +193,9 @@ def test_send_mime(self, mock_smtp, mock_smtp_ssl): msg = MIMEMultipart() utils.send_mime_email("from", "to", msg, current_app.config, dryrun=False) mock_smtp.assert_called_with( - current_app.config["SMTP_HOST"], current_app.config["SMTP_PORT"] + current_app.config["SMTP_HOST"], + current_app.config["SMTP_PORT"], + timeout=current_app.config["SMTP_TIMEOUT"], ) assert mock_smtp.return_value.starttls.called mock_smtp.return_value.login.assert_called_with( @@ -218,6 +220,7 @@ def test_send_mime_ssl(self, mock_smtp, mock_smtp_ssl): current_app.config["SMTP_HOST"], current_app.config["SMTP_PORT"], context=None, + timeout=current_app.config["SMTP_TIMEOUT"], ) @mock.patch("smtplib.SMTP_SSL") @@ -235,6 +238,7 @@ def test_send_mime_ssl_server_auth(self, mock_smtp, mock_smtp_ssl): current_app.config["SMTP_HOST"], current_app.config["SMTP_PORT"], context=mock.ANY, + timeout=current_app.config["SMTP_TIMEOUT"], ) called_context = mock_smtp_ssl.call_args.kwargs["context"] assert called_context.verify_mode == ssl.CERT_REQUIRED @@ -266,7 +270,9 @@ def test_send_mime_noauth(self, mock_smtp, mock_smtp_ssl): ) assert not mock_smtp_ssl.called mock_smtp.assert_called_with( - current_app.config["SMTP_HOST"], current_app.config["SMTP_PORT"] + current_app.config["SMTP_HOST"], + current_app.config["SMTP_PORT"], + timeout=current_app.config["SMTP_TIMEOUT"], ) assert not mock_smtp.login.called current_app.config["SMTP_USER"] = smtp_user @@ -281,6 +287,36 @@ def test_send_mime_dryrun(self, mock_smtp, mock_smtp_ssl): assert not mock_smtp.called assert not mock_smtp_ssl.called + @mock.patch("smtplib.SMTP_SSL") + @mock.patch("smtplib.SMTP") + def test_send_mime_respects_custom_timeout( + self, mock_smtp: mock.Mock, mock_smtp_ssl: mock.Mock + ) -> None: + """A configured SMTP_TIMEOUT must reach the smtplib client. + + A missing timeout would block the worker forever when the SMTP server + is unreachable, wedging the report schedule in WORKING (issue #40047). + """ + config = {**current_app.config, "SMTP_TIMEOUT": 7, "SMTP_SSL": False} + mock_smtp.return_value = mock.Mock() + utils.send_mime_email("from", ["to"], MIMEMultipart(), config, dryrun=False) + assert mock_smtp.call_args.kwargs["timeout"] == 7 + + @mock.patch("smtplib.SMTP_SSL") + @mock.patch("smtplib.SMTP") + def test_send_mime_timeout_defaults_when_unset( + self, mock_smtp: mock.Mock, mock_smtp_ssl: mock.Mock + ) -> None: + """An absent SMTP_TIMEOUT key falls back to the 30s default. + + Custom configs predating SMTP_TIMEOUT must still get a finite timeout. + """ + config = {k: v for k, v in current_app.config.items() if k != "SMTP_TIMEOUT"} + config["SMTP_SSL"] = False + mock_smtp.return_value = mock.Mock() + utils.send_mime_email("from", ["to"], MIMEMultipart(), config, dryrun=False) + assert mock_smtp.call_args.kwargs["timeout"] == 30 + if __name__ == "__main__": unittest.main() diff --git a/tests/integration_tests/reports/commands_tests.py b/tests/integration_tests/reports/commands_tests.py index ae1c855d85ae..0a5b8bf1a10f 100644 --- a/tests/integration_tests/reports/commands_tests.py +++ b/tests/integration_tests/reports/commands_tests.py @@ -2004,7 +2004,11 @@ def test_slack_token_callable_chart_report( TEST_ID, create_report_slack_chart.id, datetime.utcnow() ).run() slack_token_mock.assert_called() - slack_client_mock_class.assert_called_with(token="cool_code", proxy=None) # noqa: S106 + slack_client_mock_class.assert_called_with( + token="cool_code", # noqa: S106 + proxy=None, + timeout=30, + ) assert_log(ReportState.SUCCESS) diff --git a/tests/unit_tests/utils/csv_tests.py b/tests/unit_tests/utils/csv_tests.py index ee2ae3ab356f..17c600608755 100644 --- a/tests/unit_tests/utils/csv_tests.py +++ b/tests/unit_tests/utils/csv_tests.py @@ -16,6 +16,9 @@ # under the License. +from typing import Any +from unittest import mock + import pandas as pd import pyarrow as pa import pytest # noqa: F401 @@ -25,6 +28,7 @@ from superset.utils.core import GenericDataType from superset.utils.csv import ( df_to_escaped_csv, + get_chart_csv_data, get_chart_dataframe, ) @@ -75,20 +79,33 @@ def test_escape_value(): assert result == "'\rfoo" -def fake_get_chart_csv_data_none(chart_url, auth_cookies=None): +def fake_get_chart_csv_data_none( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, +) -> bytes | None: + """Return ``None`` to mock a fetch that yields no payload.""" return None -def fake_get_chart_csv_data_empty(chart_url, auth_cookies=None): - # Return JSON with empty data so that the resulting DataFrame is empty - fake_result = { +def fake_get_chart_csv_data_empty( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, +) -> bytes | None: + """Return an encoded empty-result payload for dataframe-empty scenarios.""" + fake_result: dict[str, Any] = { "result": [{"data": {}, "coltypes": [], "colnames": [], "indexnames": []}] } return json.dumps(fake_result).encode("utf-8") -def fake_get_chart_csv_data_valid(chart_url, auth_cookies=None): - # Return JSON with non-temporal data and valid indexnames so that they are used. +def fake_get_chart_csv_data_valid( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, +) -> bytes | None: + """Return a non-temporal payload used to verify dataframe construction.""" fake_result = { "result": [ { @@ -103,7 +120,11 @@ def fake_get_chart_csv_data_valid(chart_url, auth_cookies=None): return json.dumps(fake_result).encode("utf-8") -def fake_get_chart_csv_data_temporal(chart_url, auth_cookies=None): +def fake_get_chart_csv_data_temporal( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, +) -> bytes | None: """ Return JSON with a temporal column and valid indexnames so that a MultiIndex is built. @@ -122,8 +143,12 @@ def fake_get_chart_csv_data_temporal(chart_url, auth_cookies=None): return json.dumps(fake_result).encode("utf-8") -def fake_get_chart_csv_data_hierarchical(chart_url, auth_cookies=None): - # Return JSON with hierarchical column (list-based) and matching index names. +def fake_get_chart_csv_data_hierarchical( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, +) -> bytes | None: + """Return hierarchical-column mock data for MultiIndex assertions.""" fake_result = { "result": [ { @@ -138,7 +163,11 @@ def fake_get_chart_csv_data_hierarchical(chart_url, auth_cookies=None): return json.dumps(fake_result).encode("utf-8") -def fake_get_chart_csv_data_with_na_values(chart_url, auth_cookies=None): +def fake_get_chart_csv_data_with_na_values( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, +) -> bytes | None: # Return JSON with data containing "NA" string value that will be treated as null fake_result = { "result": [ @@ -380,3 +409,41 @@ def test_get_chart_dataframe_preserves_na_string_values( last_name_values = df[("last_name",)].values assert last_name_values[0] == "Smith" assert last_name_values[1] == "NA" + + +def test_get_chart_csv_data_passes_timeout_to_opener( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """The timeout argument must reach the urllib opener's open() call.""" + # Without a timeout the request blocks forever when the webserver is + # unreachable, wedging the report schedule in WORKING (issue #40047). + mock_response = mock.Mock() + mock_response.read.return_value = b"data" + mock_response.getcode.return_value = 200 + mock_opener = mock.Mock() + mock_opener.open.return_value = mock_response + mock_opener.addheaders = [] + monkeypatch.setattr( + "urllib.request.build_opener", mock.Mock(return_value=mock_opener) + ) + + get_chart_csv_data("http://dummy-url", auth_cookies={"session": "x"}, timeout=42) + + mock_opener.open.assert_called_once_with("http://dummy-url", timeout=42) + + +def test_get_chart_dataframe_forwards_timeout(monkeypatch: pytest.MonkeyPatch) -> None: + """get_chart_dataframe must forward its timeout down to get_chart_csv_data.""" + captured: dict[str, float | None] = {} + + def fake( + chart_url: str, + auth_cookies: dict[str, str] | None = None, + timeout: float | None = None, + ) -> bytes | None: + captured["timeout"] = timeout + return None + + monkeypatch.setattr(csv, "get_chart_csv_data", fake) + get_chart_dataframe("http://dummy-url", timeout=99) + assert captured["timeout"] == 99 diff --git a/tests/unit_tests/utils/webdriver_test.py b/tests/unit_tests/utils/webdriver_test.py index c5ad1a1a14a0..06a84ef967c3 100644 --- a/tests/unit_tests/utils/webdriver_test.py +++ b/tests/unit_tests/utils/webdriver_test.py @@ -273,6 +273,36 @@ def test_empty_webdriver_config(self, mock_chrome, mock_app_patch, mock_app): # Should create driver without errors mock_driver_class.assert_called_once() + @patch("superset.utils.webdriver.app") + def test_driver_sets_page_load_timeout(self, mock_app_patch: MagicMock) -> None: + """driver.get() must be bounded so it can't block forever (#40047).""" + mock_app_patch.config = { + "SCREENSHOT_LOCATE_WAIT": 10, + "SCREENSHOT_LOAD_WAIT": 10, + "SCREENSHOT_PAGE_LOAD_WAIT": 120, + } + mock_driver = MagicMock() + driver = WebDriverSelenium(driver_type="chrome", window=(800, 600)) + with patch.object(driver, "_create", return_value=mock_driver): + assert driver.driver is mock_driver + mock_driver.set_page_load_timeout.assert_called_once_with(120) + + @patch("superset.utils.webdriver.app") + def test_driver_skips_page_load_timeout_when_none( + self, mock_app_patch: MagicMock + ) -> None: + """Setting SCREENSHOT_PAGE_LOAD_WAIT to None disables the bound.""" + mock_app_patch.config = { + "SCREENSHOT_LOCATE_WAIT": 10, + "SCREENSHOT_LOAD_WAIT": 10, + "SCREENSHOT_PAGE_LOAD_WAIT": None, + } + mock_driver = MagicMock() + driver = WebDriverSelenium(driver_type="chrome", window=(800, 600)) + with patch.object(driver, "_create", return_value=mock_driver): + assert driver.driver is mock_driver + mock_driver.set_page_load_timeout.assert_not_called() + class TestPlaywrightAvailabilityCheck: """Test comprehensive Playwright availability checking."""