From 99ea3eb08c865a930ef9bfb2717523f0193fd56a Mon Sep 17 00:00:00 2001 From: Igor Dvorzhak Date: Mon, 6 Apr 2026 11:07:10 -0700 Subject: [PATCH 1/3] feat: Auto-detect more popular IDEs for usage tracking --- .../dataproc_spark_connect/environment.py | 115 +++++++- tests/unit/test_environment.py | 257 ++++++++++++++++++ 2 files changed, 359 insertions(+), 13 deletions(-) diff --git a/google/cloud/dataproc_spark_connect/environment.py b/google/cloud/dataproc_spark_connect/environment.py index 1f00a033..0428031e 100644 --- a/google/cloud/dataproc_spark_connect/environment.py +++ b/google/cloud/dataproc_spark_connect/environment.py @@ -18,7 +18,7 @@ def is_vscode() -> bool: - """True if running inside VS Code at all.""" + """True if running inside VS Code at all.""" return os.getenv("VSCODE_PID") is not None @@ -38,16 +38,81 @@ def is_colab() -> bool: def is_workbench() -> bool: - """True if running in AI Workbench (managed Jupyter).""" + """True if running in Vertex Workbench Instance (managed Jupyter).""" return os.getenv("VERTEX_PRODUCT") == "WORKBENCH_INSTANCE" +def is_kaggle() -> bool: + """True if running in Kaggle Notebooks.""" + return os.getenv("KAGGLE_KERNEL_RUN_TYPE") is not None + + +def is_databricks() -> bool: + """True if running in Databricks.""" + return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None + + +def is_sagemaker() -> bool: + """True if running in AWS SageMaker.""" + return os.getenv("SAGEMAKER_INTERNAL_IMAGE_URI") is not None + + +def is_deepnote() -> bool: + """True if running in Deepnote.""" + return os.getenv("DEEPNOTE_PROJECT_ID") is not None + + +def is_datalore() -> bool: + """True if running in JetBrains Datalore.""" + return os.getenv("DATALORE_USER") is not None + + +def is_spyder() -> bool: + """True if running inside Spyder IDE.""" + return any(k.startswith("SPYDER") for k in os.environ) + + +def is_cloud_shell() -> bool: + """True if running in Google Cloud Shell.""" + return os.getenv("CLOUD_SHELL") is not None + + +def is_codespaces() -> bool: + """True if running in GitHub Codespaces.""" + return os.getenv("CODESPACES") is not None + + def is_jetbrains_ide() -> bool: - """True if running inside any JetBrains IDE.""" - return "jetbrains" in os.getenv("TERMINAL_EMULATOR", "").lower() + """True if running inside JetBrains IDE.""" + return ( + "jetbrains" in os.getenv("TERMINAL_EMULATOR", "").lower() + or "PYCHARM_HOSTED" in os.environ + ) + + +def is_hex() -> bool: + """True if running in Hex.""" + return os.getenv("HEX_PROJECT_ID") is not None + + +def is_jetski() -> bool: + """True if running in JetSki.""" + return os.getenv("JETSKI_VERSION") is not None + + +def is_polynote() -> bool: + """True if running in Polynote.""" + return os.getenv("POLYNOTE_VERSION") is not None + + +def is_eclipse() -> bool: + """True if running inside Eclipse IDE.""" + return "ECLIPSE_HOME" in os.environ or any( + k.startswith("ECLIPSE") for k in os.environ + ) -def is_interactive(): +def is_interactive() -> bool: try: from IPython import get_ipython @@ -56,14 +121,14 @@ def is_interactive(): except ImportError: pass - return hasattr(sys, "ps1") or sys.flags.interactive + return hasattr(sys, "ps1") or sys.flags.interactive == 1 -def is_terminal(): +def is_terminal() -> bool: return sys.stdin.isatty() -def is_interactive_terminal(): +def is_interactive_terminal() -> bool: return is_interactive() and is_terminal() @@ -78,18 +143,42 @@ def get_client_environment_label() -> str: Priority order: 1. Colab Enterprise ("colab-enterprise") 2. Colab ("colab") - 3. Workbench ("workbench-jupyter") - 4. VS Code ("vscode") - 5. JetBrains IDE ("jetbrains") - 6. Jupyter ("jupyter") - 7. Unknown ("unknown") + 3. Vertex Workbench Instance ("workbench-jupyter") + 4. Kaggle ("kaggle") + 5. AWS SageMaker ("sagemaker") + 6. Databricks ("databricks") + 7. Deepnote ("deepnote") + 8. JetBrains Datalore ("datalore") + 9. GitHub Codespaces ("codespaces") + 10. Google Cloud Shell ("cloud-shell") + 11. Hex ("hex") + 12. JetSki ("jetski") + 13. Polynote ("polynote") + 14. VS Code ("vscode") + 15. JetBrains IDE ("jetbrains") + 16. Spyder ("spyder") + 17. Eclipse ("eclipse") + 18. Jupyter ("jupyter") + 19. Unknown ("unknown") """ checks: List[Tuple[Callable[[], bool], str]] = [ (is_colab_enterprise, "colab-enterprise"), (is_colab, "colab"), (is_workbench, "workbench-jupyter"), + (is_kaggle, "kaggle"), + (is_sagemaker, "sagemaker"), + (is_databricks, "databricks"), + (is_deepnote, "deepnote"), + (is_datalore, "datalore"), + (is_codespaces, "codespaces"), + (is_cloud_shell, "cloud-shell"), + (is_hex, "hex"), + (is_jetski, "jetski"), + (is_polynote, "polynote"), (is_vscode, "vscode"), (is_jetbrains_ide, "jetbrains"), + (is_spyder, "spyder"), + (is_eclipse, "eclipse"), (is_jupyter, "jupyter"), ] for detector, label in checks: diff --git a/tests/unit/test_environment.py b/tests/unit/test_environment.py index 836fbd38..86b4d4de 100644 --- a/tests/unit/test_environment.py +++ b/tests/unit/test_environment.py @@ -70,16 +70,123 @@ def test_is_workbench_false(self): os.environ["VERTEX_PRODUCT"] = "OTHER" self.assertFalse(environment.is_workbench()) + def test_is_kaggle_true(self): + os.environ["KAGGLE_KERNEL_RUN_TYPE"] = "Interactive" + self.assertTrue(environment.is_kaggle()) + + def test_is_kaggle_false(self): + os.environ.pop("KAGGLE_KERNEL_RUN_TYPE", None) + self.assertFalse(environment.is_kaggle()) + + def test_is_databricks_true(self): + os.environ["DATABRICKS_RUNTIME_VERSION"] = "10.4.x-scala2.12" + self.assertTrue(environment.is_databricks()) + + def test_is_databricks_false(self): + os.environ.pop("DATABRICKS_RUNTIME_VERSION", None) + self.assertFalse(environment.is_databricks()) + + def test_is_sagemaker_true(self): + os.environ["SAGEMAKER_INTERNAL_IMAGE_URI"] = "image" + self.assertTrue(environment.is_sagemaker()) + + def test_is_sagemaker_false(self): + os.environ.pop("SAGEMAKER_INTERNAL_IMAGE_URI", None) + self.assertFalse(environment.is_sagemaker()) + + def test_is_deepnote_true(self): + os.environ["DEEPNOTE_PROJECT_ID"] = "project-123" + self.assertTrue(environment.is_deepnote()) + + def test_is_deepnote_false(self): + os.environ.pop("DEEPNOTE_PROJECT_ID", None) + self.assertFalse(environment.is_deepnote()) + + def test_is_datalore_true(self): + os.environ["DATALORE_USER"] = "user-123" + self.assertTrue(environment.is_datalore()) + + def test_is_datalore_false(self): + os.environ.pop("DATALORE_USER", None) + self.assertFalse(environment.is_datalore()) + + def test_is_spyder_true(self): + os.environ["SPYDER_ARGS"] = "[]" + self.assertTrue(environment.is_spyder()) + + def test_is_spyder_false(self): + for k in list(os.environ.keys()): + if k.startswith("SPYDER"): + os.environ.pop(k) + self.assertFalse(environment.is_spyder()) + + def test_is_cloud_shell_true(self): + os.environ["CLOUD_SHELL"] = "true" + self.assertTrue(environment.is_cloud_shell()) + + def test_is_cloud_shell_false(self): + os.environ.pop("CLOUD_SHELL", None) + self.assertFalse(environment.is_cloud_shell()) + + def test_is_codespaces_true(self): + os.environ["CODESPACES"] = "true" + self.assertTrue(environment.is_codespaces()) + + def test_is_codespaces_false(self): + os.environ.pop("CODESPACES", None) + self.assertFalse(environment.is_codespaces()) + + def test_is_hex_true(self): + os.environ["HEX_PROJECT_ID"] = "hex-123" + self.assertTrue(environment.is_hex()) + + def test_is_hex_false(self): + os.environ.pop("HEX_PROJECT_ID", None) + self.assertFalse(environment.is_hex()) + + def test_is_jetski_true(self): + os.environ["JETSKI_VERSION"] = "1.0" + self.assertTrue(environment.is_jetski()) + + def test_is_jetski_false(self): + os.environ.pop("JETSKI_VERSION", None) + self.assertFalse(environment.is_jetski()) + + def test_is_polynote_true(self): + os.environ["POLYNOTE_VERSION"] = "1.0" + self.assertTrue(environment.is_polynote()) + + def test_is_polynote_false(self): + os.environ.pop("POLYNOTE_VERSION", None) + self.assertFalse(environment.is_polynote()) + + def test_is_eclipse_true(self): + os.environ["ECLIPSE_HOME"] = "/path/to/eclipse" + self.assertTrue(environment.is_eclipse()) + + def test_is_eclipse_false(self): + for k in list(os.environ.keys()): + if k.startswith("ECLIPSE"): + os.environ.pop(k) + self.assertFalse(environment.is_eclipse()) + def test_is_jetbrains_ide_true(self): os.environ["TERMINAL_EMULATOR"] = "JetBrains term" self.assertTrue(environment.is_jetbrains_ide()) + def test_is_jetbrains_ide_true_pycharm(self): + os.environ.pop("TERMINAL_EMULATOR", None) + os.environ["PYCHARM_HOSTED"] = "1" + self.assertTrue(environment.is_jetbrains_ide()) + def test_is_jetbrains_ide_false_env_var_not_set(self): os.environ.pop("TERMINAL_EMULATOR", None) + os.environ.pop("PYCHARM_HOSTED", None) self.assertFalse(environment.is_jetbrains_ide()) def test_is_jetbrains_ide_false_env_var_not_jetbrains(self): os.environ["TERMINAL_EMULATOR"] = "real term" + os.environ.pop("PYCHARM_HOSTED", None) self.assertFalse(environment.is_jetbrains_ide()) # ---- get_client_environment_label tests ---- @@ -96,6 +203,46 @@ def test_is_jetbrains_ide_false_env_var_not_jetbrains(self): "google.cloud.dataproc_spark_connect.environment.is_workbench", return_value=False, ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_kaggle", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_sagemaker", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_databricks", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_deepnote", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_datalore", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_codespaces", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_cloud_shell", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_hex", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_jetski", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_polynote", + return_value=False, + ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_vscode", return_value=False, @@ -104,6 +251,14 @@ def test_is_jetbrains_ide_false_env_var_not_jetbrains(self): "google.cloud.dataproc_spark_connect.environment.is_jetbrains_ide", return_value=False, ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_spyder", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_eclipse", + return_value=False, + ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_jupyter", return_value=False, @@ -170,6 +325,68 @@ def test_get_client_environment_label_workbench(self, *mocks): "google.cloud.dataproc_spark_connect.environment.is_workbench", return_value=False, ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_kaggle", + return_value=True, + ) + def test_get_client_environment_label_kaggle(self, *mocks): + self.assertEqual( + environment.get_client_environment_label(), + "kaggle", + ) + + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_colab_enterprise", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_colab", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_workbench", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_kaggle", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_sagemaker", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_databricks", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_deepnote", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_datalore", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_codespaces", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_cloud_shell", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_hex", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_jetski", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_polynote", + return_value=False, + ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_vscode", return_value=True, @@ -192,6 +409,46 @@ def test_get_client_environment_label_vscode(self, *mocks): "google.cloud.dataproc_spark_connect.environment.is_workbench", return_value=False, ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_kaggle", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_sagemaker", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_databricks", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_deepnote", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_datalore", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_codespaces", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_cloud_shell", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_hex", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_jetski", + return_value=False, + ) + @mock.patch( + "google.cloud.dataproc_spark_connect.environment.is_polynote", + return_value=False, + ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_vscode", return_value=False, From 7e247af187a8f66d4f578ab1a2640c1eaaac5e93 Mon Sep 17 00:00:00 2001 From: Igor Dvorzhak Date: Mon, 6 Apr 2026 12:48:16 -0700 Subject: [PATCH 2/3] Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- google/cloud/dataproc_spark_connect/environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/dataproc_spark_connect/environment.py b/google/cloud/dataproc_spark_connect/environment.py index 0428031e..2411f290 100644 --- a/google/cloud/dataproc_spark_connect/environment.py +++ b/google/cloud/dataproc_spark_connect/environment.py @@ -121,7 +121,7 @@ def is_interactive() -> bool: except ImportError: pass - return hasattr(sys, "ps1") or sys.flags.interactive == 1 + return hasattr(sys, "ps1") or bool(sys.flags.interactive) def is_terminal() -> bool: From fad28cfbd00d705d57cbf14e998687be06d1032a Mon Sep 17 00:00:00 2001 From: Igor Dvorzhak Date: Mon, 6 Apr 2026 13:03:11 -0700 Subject: [PATCH 3/3] Remove irrelevant IDEs --- .../dataproc_spark_connect/environment.py | 21 +++++++------------ tests/unit/test_environment.py | 20 ------------------ 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/google/cloud/dataproc_spark_connect/environment.py b/google/cloud/dataproc_spark_connect/environment.py index 2411f290..c74e81eb 100644 --- a/google/cloud/dataproc_spark_connect/environment.py +++ b/google/cloud/dataproc_spark_connect/environment.py @@ -95,11 +95,6 @@ def is_hex() -> bool: return os.getenv("HEX_PROJECT_ID") is not None -def is_jetski() -> bool: - """True if running in JetSki.""" - return os.getenv("JETSKI_VERSION") is not None - - def is_polynote() -> bool: """True if running in Polynote.""" return os.getenv("POLYNOTE_VERSION") is not None @@ -152,14 +147,13 @@ def get_client_environment_label() -> str: 9. GitHub Codespaces ("codespaces") 10. Google Cloud Shell ("cloud-shell") 11. Hex ("hex") - 12. JetSki ("jetski") - 13. Polynote ("polynote") - 14. VS Code ("vscode") - 15. JetBrains IDE ("jetbrains") - 16. Spyder ("spyder") - 17. Eclipse ("eclipse") - 18. Jupyter ("jupyter") - 19. Unknown ("unknown") + 12. Polynote ("polynote") + 13. VS Code ("vscode") + 14. JetBrains IDE ("jetbrains") + 15. Spyder ("spyder") + 16. Eclipse ("eclipse") + 17. Jupyter ("jupyter") + 18. Unknown ("unknown") """ checks: List[Tuple[Callable[[], bool], str]] = [ (is_colab_enterprise, "colab-enterprise"), @@ -173,7 +167,6 @@ def get_client_environment_label() -> str: (is_codespaces, "codespaces"), (is_cloud_shell, "cloud-shell"), (is_hex, "hex"), - (is_jetski, "jetski"), (is_polynote, "polynote"), (is_vscode, "vscode"), (is_jetbrains_ide, "jetbrains"), diff --git a/tests/unit/test_environment.py b/tests/unit/test_environment.py index 86b4d4de..d3bd5f0f 100644 --- a/tests/unit/test_environment.py +++ b/tests/unit/test_environment.py @@ -144,14 +144,6 @@ def test_is_hex_false(self): os.environ.pop("HEX_PROJECT_ID", None) self.assertFalse(environment.is_hex()) - def test_is_jetski_true(self): - os.environ["JETSKI_VERSION"] = "1.0" - self.assertTrue(environment.is_jetski()) - - def test_is_jetski_false(self): - os.environ.pop("JETSKI_VERSION", None) - self.assertFalse(environment.is_jetski()) - def test_is_polynote_true(self): os.environ["POLYNOTE_VERSION"] = "1.0" self.assertTrue(environment.is_polynote()) @@ -235,10 +227,6 @@ def test_is_jetbrains_ide_false_env_var_not_jetbrains(self): "google.cloud.dataproc_spark_connect.environment.is_hex", return_value=False, ) - @mock.patch( - "google.cloud.dataproc_spark_connect.environment.is_jetski", - return_value=False, - ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_polynote", return_value=False, @@ -379,10 +367,6 @@ def test_get_client_environment_label_kaggle(self, *mocks): "google.cloud.dataproc_spark_connect.environment.is_hex", return_value=False, ) - @mock.patch( - "google.cloud.dataproc_spark_connect.environment.is_jetski", - return_value=False, - ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_polynote", return_value=False, @@ -441,10 +425,6 @@ def test_get_client_environment_label_vscode(self, *mocks): "google.cloud.dataproc_spark_connect.environment.is_hex", return_value=False, ) - @mock.patch( - "google.cloud.dataproc_spark_connect.environment.is_jetski", - return_value=False, - ) @mock.patch( "google.cloud.dataproc_spark_connect.environment.is_polynote", return_value=False,