Skip to content

Commit f0d20e7

Browse files
Snow 2713526 python richer client environment signals additional requirements (snowflakedb#2651)
1 parent 1680c6c commit f0d20e7

File tree

4 files changed

+328
-80
lines changed

4 files changed

+328
-80
lines changed

DESCRIPTION.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1515
- Added no_proxy parameter for proxy configuration without using environmental variables.
1616
- Added OAUTH_AUTHORIZATION_CODE and OAUTH_CLIENT_CREDENTIALS to list of authenticators that don't require user to be set
1717
- Added `oauth_socket_uri` connection parameter allowing to separate server and redirect URIs for local OAuth server.
18-
- Fixed FIPS environments md5 hash isues with multipart upload on Azure.
18+
- Made platform_detection logs silent and improved its timeout handling. Added support for ENV_VAR_DISABLE_PLATFORM_DETECTION environment variable.
19+
- Fixed FIPS environments md5 hash issues with multipart upload on Azure.
1920

2021
- v4.0.0(October 09,2025)
2122
- Added support for checking certificates revocation using revocation lists (CRLs)

src/snowflake/connector/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,10 @@ class IterUnit(Enum):
441441
# TODO: all env variables definitions should be here
442442
ENV_VAR_PARTNER = "SF_PARTNER"
443443
ENV_VAR_TEST_MODE = "SNOWFLAKE_TEST_MODE"
444+
ENV_VAR_DISABLE_PLATFORM_DETECTION = "SNOWFLAKE_DISABLE_PLATFORM_DETECTION"
445+
446+
# Boolean positive values (lowercased) for environment variable checks
447+
ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED = ["true"]
444448

445449
_DOMAIN_NAME_MAP = {_DEFAULT_HOSTNAME_TLD: "GLOBAL", _CHINA_HOSTNAME_TLD: "CHINA"}
446450

src/snowflake/connector/platform_detection.py

Lines changed: 145 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,17 @@
33
import logging
44
import os
55
import re
6+
from concurrent.futures import CancelledError as FutureCancelledError
7+
from concurrent.futures import TimeoutError as FutureTimeoutError
68
from concurrent.futures.thread import ThreadPoolExecutor
9+
from contextlib import contextmanager
710
from enum import Enum
811
from functools import cache
912

13+
from .constants import (
14+
ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED,
15+
ENV_VAR_DISABLE_PLATFORM_DETECTION,
16+
)
1017
from .options import boto3, botocore, installed_boto
1118

1219
if installed_boto:
@@ -18,13 +25,48 @@
1825

1926
logger = logging.getLogger(__name__)
2027

28+
# Loggers to suppress during platform detection to avoid noise in customer logs
29+
_LOGGERS_TO_SUPPRESS = [
30+
"snowflake.connector.vendored.urllib3.connectionpool",
31+
"botocore.utils",
32+
"botocore.httpsession",
33+
"urllib3.connectionpool",
34+
]
35+
36+
37+
@contextmanager
38+
def _suppress_platform_detection_logs():
39+
"""
40+
Context manager to temporarily suppress all logs from underlying HTTP libraries during platform detection.
41+
42+
This prevents noisy DEBUG logs and stack traces from urllib3 and botocore when detecting
43+
cloud platforms, which can confuse customers (SNOW-2204396). Our own debug logs are not affected.
44+
"""
45+
original_levels = {}
46+
try:
47+
# Completely suppress all logs from noisy libraries
48+
for logger_name in _LOGGERS_TO_SUPPRESS:
49+
lib_logger = logging.getLogger(logger_name)
50+
original_levels[logger_name] = lib_logger.level
51+
lib_logger.setLevel(logging.CRITICAL + 1) # Above CRITICAL = no logs at all
52+
yield
53+
finally:
54+
# Restore original log levels
55+
for logger_name, level in original_levels.items():
56+
logging.getLogger(logger_name).setLevel(level)
57+
2158

2259
class _DetectionState(Enum):
2360
"""Internal enum to represent the detection state of a platform."""
2461

2562
DETECTED = "detected"
2663
NOT_DETECTED = "not_detected"
27-
TIMEOUT = "timeout"
64+
HTTP_TIMEOUT = "timeout"
65+
WORKER_TIMEOUT = "worker_timeout"
66+
67+
68+
# Result returned when platform detection is disabled via environment variable
69+
_PLATFORM_DETECTION_DISABLED_RESULT = ["disabled"]
2870

2971

3072
def is_ec2_instance(platform_detection_timeout_seconds: float):
@@ -147,7 +189,7 @@ def is_azure_vm(
147189
session_manager: SessionManager instance for making HTTP requests.
148190
149191
Returns:
150-
_DetectionState: DETECTED if on Azure VM, TIMEOUT if request times out,
192+
_DetectionState: DETECTED if on Azure VM, HTTP_TIMEOUT if request times out,
151193
NOT_DETECTED otherwise.
152194
"""
153195
try:
@@ -162,7 +204,7 @@ def is_azure_vm(
162204
else _DetectionState.NOT_DETECTED
163205
)
164206
except Timeout:
165-
return _DetectionState.TIMEOUT
207+
return _DetectionState.HTTP_TIMEOUT
166208
except RequestException:
167209
return _DetectionState.NOT_DETECTED
168210

@@ -209,7 +251,7 @@ def is_managed_identity_available_on_azure_vm(
209251
resource: The Azure resource URI to request a token for.
210252
211253
Returns:
212-
_DetectionState: DETECTED if managed identity is available, TIMEOUT if request
254+
_DetectionState: DETECTED if managed identity is available, HTTP_TIMEOUT if request
213255
times out, NOT_DETECTED otherwise.
214256
"""
215257
endpoint = f"http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource={resource}"
@@ -224,7 +266,7 @@ def is_managed_identity_available_on_azure_vm(
224266
else _DetectionState.NOT_DETECTED
225267
)
226268
except Timeout:
227-
return _DetectionState.TIMEOUT
269+
return _DetectionState.HTTP_TIMEOUT
228270
except RequestException:
229271
return _DetectionState.NOT_DETECTED
230272

@@ -251,7 +293,7 @@ def has_azure_managed_identity(
251293
session_manager: SessionManager instance for making HTTP requests.
252294
253295
Returns:
254-
_DetectionState: DETECTED if managed identity is available, TIMEOUT if
296+
_DetectionState: DETECTED if managed identity is available, HTTP_TIMEOUT if
255297
detection timed out, NOT_DETECTED otherwise.
256298
"""
257299
# short circuit early to save on latency and avoid minting an unnecessary token
@@ -280,7 +322,7 @@ def is_gce_vm(
280322
session_manager: SessionManager instance for making HTTP requests.
281323
282324
Returns:
283-
_DetectionState: DETECTED if on GCE, TIMEOUT if request times out,
325+
_DetectionState: DETECTED if on GCE, HTTP_TIMEOUT if request times out,
284326
NOT_DETECTED otherwise.
285327
"""
286328
try:
@@ -294,7 +336,7 @@ def is_gce_vm(
294336
else _DetectionState.NOT_DETECTED
295337
)
296338
except Timeout:
297-
return _DetectionState.TIMEOUT
339+
return _DetectionState.HTTP_TIMEOUT
298340
except RequestException:
299341
return _DetectionState.NOT_DETECTED
300342

@@ -350,7 +392,7 @@ def has_gcp_identity(
350392
platform_detection_timeout_seconds: Timeout value for the metadata service request.
351393
session_manager: SessionManager instance for making HTTP requests.
352394
Returns:
353-
_DetectionState: DETECTED if valid GCP identity exists, TIMEOUT if request
395+
_DetectionState: DETECTED if valid GCP identity exists, HTTP_TIMEOUT if request
354396
times out, NOT_DETECTED otherwise.
355397
"""
356398
try:
@@ -365,7 +407,7 @@ def has_gcp_identity(
365407
else _DetectionState.NOT_DETECTED
366408
)
367409
except Timeout:
368-
return _DetectionState.TIMEOUT
410+
return _DetectionState.HTTP_TIMEOUT
369411
except RequestException:
370412
return _DetectionState.NOT_DETECTED
371413

@@ -402,11 +444,24 @@ def detect_platforms(
402444
session_manager: SessionManager instance for making HTTP requests. If None, a new instance will be created.
403445
404446
Returns:
405-
list[str]: List of detected platform names. Platforms that timed out will have
406-
"_timeout" suffix appended to their name. Returns empty list if any
407-
exception occurs during detection.
447+
list[str]: List of detected platform names. Platforms that timed out (either HTTP timeout
448+
or thread timeout) will have "_timeout" suffix appended to their name.
449+
Returns _PLATFORM_DETECTION_DISABLED_RESULT if the ENV_VAR_DISABLE_PLATFORM_DETECTION
450+
environment variable is set to a value in ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED
451+
(case-insensitive). Returns empty list if any exception occurs during detection.
408452
"""
409453
try:
454+
# Check if platform detection is disabled via environment variable
455+
if (
456+
os.environ.get(ENV_VAR_DISABLE_PLATFORM_DETECTION, "").lower()
457+
in ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED
458+
):
459+
logger.debug(
460+
"Platform detection disabled via %s environment variable",
461+
ENV_VAR_DISABLE_PLATFORM_DETECTION,
462+
)
463+
return _PLATFORM_DETECTION_DISABLED_RESULT
464+
410465
if platform_detection_timeout_seconds is None:
411466
platform_detection_timeout_seconds = 0.2
412467

@@ -419,54 +474,82 @@ def detect_platforms(
419474
use_pooling=False, max_retries=0
420475
)
421476

422-
# Run environment-only checks synchronously (no network calls, no threading overhead)
423-
platforms = {
424-
"is_aws_lambda": is_aws_lambda(),
425-
"is_azure_function": is_azure_function(),
426-
"is_gce_cloud_run_service": is_gcp_cloud_run_service(),
427-
"is_gce_cloud_run_job": is_gcp_cloud_run_job(),
428-
"is_github_action": is_github_action(),
429-
}
430-
431-
# Run network-calling functions in parallel
432-
if platform_detection_timeout_seconds != 0.0:
433-
with ThreadPoolExecutor(max_workers=6) as executor:
434-
futures = {
435-
"is_ec2_instance": executor.submit(
436-
is_ec2_instance, platform_detection_timeout_seconds
437-
),
438-
"has_aws_identity": executor.submit(
439-
has_aws_identity, platform_detection_timeout_seconds
440-
),
441-
"is_azure_vm": executor.submit(
442-
is_azure_vm, platform_detection_timeout_seconds, session_manager
443-
),
444-
"has_azure_managed_identity": executor.submit(
445-
has_azure_managed_identity,
446-
platform_detection_timeout_seconds,
447-
session_manager,
448-
),
449-
"is_gce_vm": executor.submit(
450-
is_gce_vm, platform_detection_timeout_seconds, session_manager
451-
),
452-
"has_gcp_identity": executor.submit(
453-
has_gcp_identity,
454-
platform_detection_timeout_seconds,
455-
session_manager,
456-
),
457-
}
458-
459-
platforms.update(
460-
{key: future.result() for key, future in futures.items()}
461-
)
462-
463-
detected_platforms = []
464-
for platform_name, detection_state in platforms.items():
465-
if detection_state == _DetectionState.DETECTED:
466-
detected_platforms.append(platform_name)
467-
elif detection_state == _DetectionState.TIMEOUT:
468-
detected_platforms.append(f"{platform_name}_timeout")
469-
470-
return detected_platforms
477+
# HTTP timeout should be slightly shorter than thread timeout to allow HTTP-level
478+
# timeouts to occur before thread executor times out. This helps distinguish between
479+
# HTTP_TIMEOUT (network issue) and WORKER_TIMEOUT (thread stuck/hung).
480+
http_timeout_epsilon = 0.05 # 5% shorter
481+
http_timeout = platform_detection_timeout_seconds * (1 - http_timeout_epsilon)
482+
threads_timeout = platform_detection_timeout_seconds
483+
484+
# Suppress noisy logs from underlying HTTP libraries during platform detection
485+
with _suppress_platform_detection_logs():
486+
# Run environment-only checks synchronously (no network calls, no threading overhead)
487+
platforms = {
488+
"is_aws_lambda": is_aws_lambda(),
489+
"is_azure_function": is_azure_function(),
490+
"is_gce_cloud_run_service": is_gcp_cloud_run_service(),
491+
"is_gce_cloud_run_job": is_gcp_cloud_run_job(),
492+
"is_github_action": is_github_action(),
493+
}
494+
495+
# Run network-calling functions in parallel
496+
if platform_detection_timeout_seconds != 0.0:
497+
with ThreadPoolExecutor(max_workers=6) as executor:
498+
futures = {
499+
"is_ec2_instance": executor.submit(
500+
is_ec2_instance, http_timeout
501+
),
502+
"has_aws_identity": executor.submit(
503+
has_aws_identity, http_timeout
504+
),
505+
"is_azure_vm": executor.submit(
506+
is_azure_vm,
507+
http_timeout,
508+
session_manager,
509+
),
510+
"has_azure_managed_identity": executor.submit(
511+
has_azure_managed_identity,
512+
http_timeout,
513+
session_manager,
514+
),
515+
"is_gce_vm": executor.submit(
516+
is_gce_vm,
517+
http_timeout,
518+
session_manager,
519+
),
520+
"has_gcp_identity": executor.submit(
521+
has_gcp_identity,
522+
http_timeout,
523+
session_manager,
524+
),
525+
}
526+
527+
# Enforce timeout at executor level - all parallel detections must complete
528+
# within threads_timeout
529+
for key, future in futures.items():
530+
try:
531+
platforms[key] = future.result(timeout=threads_timeout)
532+
except (FutureTimeoutError, FutureCancelledError):
533+
# Thread/future timed out at executor level
534+
platforms[key] = _DetectionState.WORKER_TIMEOUT
535+
except Exception:
536+
# Any other error from the thread
537+
platforms[key] = _DetectionState.NOT_DETECTED
538+
539+
detected_platforms = []
540+
for platform_name, detection_state in platforms.items():
541+
if detection_state == _DetectionState.DETECTED:
542+
detected_platforms.append(platform_name)
543+
elif detection_state in (
544+
_DetectionState.HTTP_TIMEOUT,
545+
_DetectionState.WORKER_TIMEOUT,
546+
):
547+
detected_platforms.append(f"{platform_name}_timeout")
548+
549+
logger.debug(
550+
"Platform detection completed. Detected platforms: %s",
551+
detected_platforms,
552+
)
553+
return detected_platforms
471554
except Exception:
472555
return []

0 commit comments

Comments
 (0)