33import logging
44import os
55import re
6+ from concurrent .futures import CancelledError as FutureCancelledError
7+ from concurrent .futures import TimeoutError as FutureTimeoutError
68from concurrent .futures .thread import ThreadPoolExecutor
9+ from contextlib import contextmanager
710from enum import Enum
811from functools import cache
912
13+ from .constants import (
14+ ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED ,
15+ ENV_VAR_DISABLE_PLATFORM_DETECTION ,
16+ )
1017from .options import boto3 , botocore , installed_boto
1118
1219if installed_boto :
1825
1926logger = logging .getLogger (__name__ )
2027
28+ # Loggers to suppress during platform detection to avoid noise in customer logs
29+ _LOGGERS_TO_SUPPRESS = [
30+ "snowflake.connector.vendored.urllib3.connectionpool" ,
31+ "botocore.utils" ,
32+ "botocore.httpsession" ,
33+ "urllib3.connectionpool" ,
34+ ]
35+
36+
37+ @contextmanager
38+ def _suppress_platform_detection_logs ():
39+ """
40+ Context manager to temporarily suppress all logs from underlying HTTP libraries during platform detection.
41+
42+ This prevents noisy DEBUG logs and stack traces from urllib3 and botocore when detecting
43+ cloud platforms, which can confuse customers (SNOW-2204396). Our own debug logs are not affected.
44+ """
45+ original_levels = {}
46+ try :
47+ # Completely suppress all logs from noisy libraries
48+ for logger_name in _LOGGERS_TO_SUPPRESS :
49+ lib_logger = logging .getLogger (logger_name )
50+ original_levels [logger_name ] = lib_logger .level
51+ lib_logger .setLevel (logging .CRITICAL + 1 ) # Above CRITICAL = no logs at all
52+ yield
53+ finally :
54+ # Restore original log levels
55+ for logger_name , level in original_levels .items ():
56+ logging .getLogger (logger_name ).setLevel (level )
57+
2158
2259class _DetectionState (Enum ):
2360 """Internal enum to represent the detection state of a platform."""
2461
2562 DETECTED = "detected"
2663 NOT_DETECTED = "not_detected"
27- TIMEOUT = "timeout"
64+ HTTP_TIMEOUT = "timeout"
65+ WORKER_TIMEOUT = "worker_timeout"
66+
67+
68+ # Result returned when platform detection is disabled via environment variable
69+ _PLATFORM_DETECTION_DISABLED_RESULT = ["disabled" ]
2870
2971
3072def is_ec2_instance (platform_detection_timeout_seconds : float ):
@@ -147,7 +189,7 @@ def is_azure_vm(
147189 session_manager: SessionManager instance for making HTTP requests.
148190
149191 Returns:
150- _DetectionState: DETECTED if on Azure VM, TIMEOUT if request times out,
192+ _DetectionState: DETECTED if on Azure VM, HTTP_TIMEOUT if request times out,
151193 NOT_DETECTED otherwise.
152194 """
153195 try :
@@ -162,7 +204,7 @@ def is_azure_vm(
162204 else _DetectionState .NOT_DETECTED
163205 )
164206 except Timeout :
165- return _DetectionState .TIMEOUT
207+ return _DetectionState .HTTP_TIMEOUT
166208 except RequestException :
167209 return _DetectionState .NOT_DETECTED
168210
@@ -209,7 +251,7 @@ def is_managed_identity_available_on_azure_vm(
209251 resource: The Azure resource URI to request a token for.
210252
211253 Returns:
212- _DetectionState: DETECTED if managed identity is available, TIMEOUT if request
254+ _DetectionState: DETECTED if managed identity is available, HTTP_TIMEOUT if request
213255 times out, NOT_DETECTED otherwise.
214256 """
215257 endpoint = f"http://169.254.169.254/metadata/identity/oauth2/token?api-version=2018-02-01&resource={ resource } "
@@ -224,7 +266,7 @@ def is_managed_identity_available_on_azure_vm(
224266 else _DetectionState .NOT_DETECTED
225267 )
226268 except Timeout :
227- return _DetectionState .TIMEOUT
269+ return _DetectionState .HTTP_TIMEOUT
228270 except RequestException :
229271 return _DetectionState .NOT_DETECTED
230272
@@ -251,7 +293,7 @@ def has_azure_managed_identity(
251293 session_manager: SessionManager instance for making HTTP requests.
252294
253295 Returns:
254- _DetectionState: DETECTED if managed identity is available, TIMEOUT if
296+ _DetectionState: DETECTED if managed identity is available, HTTP_TIMEOUT if
255297 detection timed out, NOT_DETECTED otherwise.
256298 """
257299 # short circuit early to save on latency and avoid minting an unnecessary token
@@ -280,7 +322,7 @@ def is_gce_vm(
280322 session_manager: SessionManager instance for making HTTP requests.
281323
282324 Returns:
283- _DetectionState: DETECTED if on GCE, TIMEOUT if request times out,
325+ _DetectionState: DETECTED if on GCE, HTTP_TIMEOUT if request times out,
284326 NOT_DETECTED otherwise.
285327 """
286328 try :
@@ -294,7 +336,7 @@ def is_gce_vm(
294336 else _DetectionState .NOT_DETECTED
295337 )
296338 except Timeout :
297- return _DetectionState .TIMEOUT
339+ return _DetectionState .HTTP_TIMEOUT
298340 except RequestException :
299341 return _DetectionState .NOT_DETECTED
300342
@@ -350,7 +392,7 @@ def has_gcp_identity(
350392 platform_detection_timeout_seconds: Timeout value for the metadata service request.
351393 session_manager: SessionManager instance for making HTTP requests.
352394 Returns:
353- _DetectionState: DETECTED if valid GCP identity exists, TIMEOUT if request
395+ _DetectionState: DETECTED if valid GCP identity exists, HTTP_TIMEOUT if request
354396 times out, NOT_DETECTED otherwise.
355397 """
356398 try :
@@ -365,7 +407,7 @@ def has_gcp_identity(
365407 else _DetectionState .NOT_DETECTED
366408 )
367409 except Timeout :
368- return _DetectionState .TIMEOUT
410+ return _DetectionState .HTTP_TIMEOUT
369411 except RequestException :
370412 return _DetectionState .NOT_DETECTED
371413
@@ -402,11 +444,24 @@ def detect_platforms(
402444 session_manager: SessionManager instance for making HTTP requests. If None, a new instance will be created.
403445
404446 Returns:
405- list[str]: List of detected platform names. Platforms that timed out will have
406- "_timeout" suffix appended to their name. Returns empty list if any
407- exception occurs during detection.
447+ list[str]: List of detected platform names. Platforms that timed out (either HTTP timeout
448+ or thread timeout) will have "_timeout" suffix appended to their name.
449+ Returns _PLATFORM_DETECTION_DISABLED_RESULT if the ENV_VAR_DISABLE_PLATFORM_DETECTION
450+ environment variable is set to a value in ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED
451+ (case-insensitive). Returns empty list if any exception occurs during detection.
408452 """
409453 try :
454+ # Check if platform detection is disabled via environment variable
455+ if (
456+ os .environ .get (ENV_VAR_DISABLE_PLATFORM_DETECTION , "" ).lower ()
457+ in ENV_VAR_BOOL_POSITIVE_VALUES_LOWERCASED
458+ ):
459+ logger .debug (
460+ "Platform detection disabled via %s environment variable" ,
461+ ENV_VAR_DISABLE_PLATFORM_DETECTION ,
462+ )
463+ return _PLATFORM_DETECTION_DISABLED_RESULT
464+
410465 if platform_detection_timeout_seconds is None :
411466 platform_detection_timeout_seconds = 0.2
412467
@@ -419,54 +474,82 @@ def detect_platforms(
419474 use_pooling = False , max_retries = 0
420475 )
421476
422- # Run environment-only checks synchronously (no network calls, no threading overhead)
423- platforms = {
424- "is_aws_lambda" : is_aws_lambda (),
425- "is_azure_function" : is_azure_function (),
426- "is_gce_cloud_run_service" : is_gcp_cloud_run_service (),
427- "is_gce_cloud_run_job" : is_gcp_cloud_run_job (),
428- "is_github_action" : is_github_action (),
429- }
430-
431- # Run network-calling functions in parallel
432- if platform_detection_timeout_seconds != 0.0 :
433- with ThreadPoolExecutor (max_workers = 6 ) as executor :
434- futures = {
435- "is_ec2_instance" : executor .submit (
436- is_ec2_instance , platform_detection_timeout_seconds
437- ),
438- "has_aws_identity" : executor .submit (
439- has_aws_identity , platform_detection_timeout_seconds
440- ),
441- "is_azure_vm" : executor .submit (
442- is_azure_vm , platform_detection_timeout_seconds , session_manager
443- ),
444- "has_azure_managed_identity" : executor .submit (
445- has_azure_managed_identity ,
446- platform_detection_timeout_seconds ,
447- session_manager ,
448- ),
449- "is_gce_vm" : executor .submit (
450- is_gce_vm , platform_detection_timeout_seconds , session_manager
451- ),
452- "has_gcp_identity" : executor .submit (
453- has_gcp_identity ,
454- platform_detection_timeout_seconds ,
455- session_manager ,
456- ),
457- }
458-
459- platforms .update (
460- {key : future .result () for key , future in futures .items ()}
461- )
462-
463- detected_platforms = []
464- for platform_name , detection_state in platforms .items ():
465- if detection_state == _DetectionState .DETECTED :
466- detected_platforms .append (platform_name )
467- elif detection_state == _DetectionState .TIMEOUT :
468- detected_platforms .append (f"{ platform_name } _timeout" )
469-
470- return detected_platforms
477+ # HTTP timeout should be slightly shorter than thread timeout to allow HTTP-level
478+ # timeouts to occur before thread executor times out. This helps distinguish between
479+ # HTTP_TIMEOUT (network issue) and WORKER_TIMEOUT (thread stuck/hung).
480+ http_timeout_epsilon = 0.05 # 5% shorter
481+ http_timeout = platform_detection_timeout_seconds * (1 - http_timeout_epsilon )
482+ threads_timeout = platform_detection_timeout_seconds
483+
484+ # Suppress noisy logs from underlying HTTP libraries during platform detection
485+ with _suppress_platform_detection_logs ():
486+ # Run environment-only checks synchronously (no network calls, no threading overhead)
487+ platforms = {
488+ "is_aws_lambda" : is_aws_lambda (),
489+ "is_azure_function" : is_azure_function (),
490+ "is_gce_cloud_run_service" : is_gcp_cloud_run_service (),
491+ "is_gce_cloud_run_job" : is_gcp_cloud_run_job (),
492+ "is_github_action" : is_github_action (),
493+ }
494+
495+ # Run network-calling functions in parallel
496+ if platform_detection_timeout_seconds != 0.0 :
497+ with ThreadPoolExecutor (max_workers = 6 ) as executor :
498+ futures = {
499+ "is_ec2_instance" : executor .submit (
500+ is_ec2_instance , http_timeout
501+ ),
502+ "has_aws_identity" : executor .submit (
503+ has_aws_identity , http_timeout
504+ ),
505+ "is_azure_vm" : executor .submit (
506+ is_azure_vm ,
507+ http_timeout ,
508+ session_manager ,
509+ ),
510+ "has_azure_managed_identity" : executor .submit (
511+ has_azure_managed_identity ,
512+ http_timeout ,
513+ session_manager ,
514+ ),
515+ "is_gce_vm" : executor .submit (
516+ is_gce_vm ,
517+ http_timeout ,
518+ session_manager ,
519+ ),
520+ "has_gcp_identity" : executor .submit (
521+ has_gcp_identity ,
522+ http_timeout ,
523+ session_manager ,
524+ ),
525+ }
526+
527+ # Enforce timeout at executor level - all parallel detections must complete
528+ # within threads_timeout
529+ for key , future in futures .items ():
530+ try :
531+ platforms [key ] = future .result (timeout = threads_timeout )
532+ except (FutureTimeoutError , FutureCancelledError ):
533+ # Thread/future timed out at executor level
534+ platforms [key ] = _DetectionState .WORKER_TIMEOUT
535+ except Exception :
536+ # Any other error from the thread
537+ platforms [key ] = _DetectionState .NOT_DETECTED
538+
539+ detected_platforms = []
540+ for platform_name , detection_state in platforms .items ():
541+ if detection_state == _DetectionState .DETECTED :
542+ detected_platforms .append (platform_name )
543+ elif detection_state in (
544+ _DetectionState .HTTP_TIMEOUT ,
545+ _DetectionState .WORKER_TIMEOUT ,
546+ ):
547+ detected_platforms .append (f"{ platform_name } _timeout" )
548+
549+ logger .debug (
550+ "Platform detection completed. Detected platforms: %s" ,
551+ detected_platforms ,
552+ )
553+ return detected_platforms
471554 except Exception :
472555 return []
0 commit comments