Add more logging for cloud run debugging

BoqianShi · BoqianShi · commit 305ed1fb497f · 2025-11-06T19:26:09.000-08:00
diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/CatalogIntegrationTestBase.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/CatalogIntegrationTestBase.java
@@ -15,16 +15,9 @@
  */
 package com.google.cloud.spark.bigquery.integration;
 
-import static com.google.common.truth.Truth.assertThat;
-
-import com.google.cloud.bigquery.BigQuery;
-import com.google.cloud.bigquery.Dataset;
-import com.google.cloud.bigquery.DatasetId;
-import com.google.cloud.bigquery.QueryJobConfiguration;
-import com.google.cloud.bigquery.Table;
-import com.google.cloud.bigquery.TableId;
 import java.util.List;
 import java.util.stream.Collectors;
+
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
 import org.apache.spark.sql.SparkSession;
@@ -36,6 +29,14 @@
 import org.junit.Ignore;
 import org.junit.Test;
 
+import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.Dataset;
+import com.google.cloud.bigquery.DatasetId;
+import com.google.cloud.bigquery.QueryJobConfiguration;
+import com.google.cloud.bigquery.Table;
+import com.google.cloud.bigquery.TableId;
+import static com.google.common.truth.Truth.assertThat;
+
 public class CatalogIntegrationTestBase {
 
   public static final String DEFAULT_NAMESPACE = "default";
@@ -45,7 +46,7 @@ public class CatalogIntegrationTestBase {
 
   protected static SparkSession spark;
   private String testTable;
-  // 2. Initialize the SparkSession ONCE before all tests
+
   @BeforeClass
   public static void setupSparkSession() {
     spark =
@@ -60,8 +61,6 @@ public static void setupSparkSession() {
             .getOrCreate();
   }
 
-  // 4. Stop the SparkSession ONCE after all tests are done
-  // This fixes the local IllegalStateException (race condition)
   @AfterClass
   public static void teardownSparkSession() {
     if (spark != null) {
@@ -253,66 +252,130 @@ public void testDropDatabase() {
 
   @Test
   public void testCatalogInitializationWithProject() {
-    spark
-        .conf()
-        .set("spark.sql.catalog.public_catalog", "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.public_catalog.project", "bigquery-public-data");
-
-    List<Row> rows = spark.sql("SHOW DATABASES IN public_catalog").collectAsList();
-    List<String> databaseNames =
-        rows.stream().map(row -> row.getString(0)).collect(Collectors.toList());
-    assertThat(databaseNames).contains("samples");
-
-    List<Row> data =
-        spark.sql("SELECT * FROM public_catalog.samples.shakespeare LIMIT 10").collectAsList();
-    assertThat(data).hasSize(10);
+    try {
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.public_catalog",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      // Use 'projectId' instead of 'project' - this is the correct property name
+      spark.conf().set("spark.sql.catalog.public_catalog.projectId", "bigquery-public-data");
+
+      // Add a small delay to ensure catalog is fully initialized
+      Thread.sleep(2000);
+      
+      // Verify catalog is accessible before querying
+      try {
+        spark.sql("USE public_catalog");
+      } catch (Exception e) {
+        // Catalog might not support USE, that's okay
+      }
+
+      List<Row> rows = spark.sql("SHOW DATABASES IN public_catalog").collectAsList();
+      List<String> databaseNames =
+          rows.stream().map(row -> row.getString(0)).collect(Collectors.toList());
+      assertThat(databaseNames).contains("samples");
+
+      List<Row> data =
+          spark.sql("SELECT * FROM public_catalog.samples.shakespeare LIMIT 10").collectAsList();
+      assertThat(data).hasSize(10);
+    } catch (Exception e) {
+      // Log the full stack trace to help debug cloud build failures
+      e.printStackTrace();
+      throw new RuntimeException("Test failed with detailed error", e);
+    } finally {
+      // Clean up catalog configuration to avoid interference with other tests
+      try {
+        spark.conf().unset("spark.sql.catalog.public_catalog");
+        spark.conf().unset("spark.sql.catalog.public_catalog.projectId");
+      } catch (Exception ignored) {
+      }
+    }
   }
 
   @Test
   public void testCreateCatalogWithLocation() throws Exception {
     String database = String.format("create_db_with_location_%s", System.nanoTime());
     DatasetId datasetId = DatasetId.of(database);
-    spark
-        .conf()
-        .set(
-            "spark.sql.catalog.test_location_catalog",
-            "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.test_location_catalog.bigquery_location", "EU");
-    spark.sql("CREATE DATABASE test_location_catalog." + database);
-    Dataset dataset = bigquery.getDataset(datasetId);
-    assertThat(dataset).isNotNull();
-    assertThat(dataset.getLocation()).isEqualTo("EU");
-    bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+    try {
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.test_location_catalog",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      spark.conf().set("spark.sql.catalog.test_location_catalog.bigquery_location", "EU");
+
+      // Add delay for catalog initialization
+      Thread.sleep(2000);
+
+      spark.sql("CREATE DATABASE test_location_catalog." + database);
+      Dataset dataset = bigquery.getDataset(datasetId);
+      assertThat(dataset).isNotNull();
+      assertThat(dataset.getLocation()).isEqualTo("EU");
+    } finally {
+      bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+      // Clean up catalog configuration
+      try {
+        spark.conf().unset("spark.sql.catalog.test_location_catalog");
+        spark.conf().unset("spark.sql.catalog.test_location_catalog.bigquery_location");
+      } catch (Exception ignored) {
+      }
+    }
   }
 
   @Test
   public void testCreateTableAsSelectWithProjectAndLocation() {
     String database = String.format("ctas_db_with_location_%s", System.nanoTime());
     String newTable = "ctas_table_from_public";
     DatasetId datasetId = DatasetId.of(database);
-    spark
-        .conf()
-        .set("spark.sql.catalog.public_catalog", "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.public_catalog.projectId", "bigquery-public-data");
-    spark
-        .conf()
-        .set(
-            "spark.sql.catalog.test_catalog_as_select",
-            "com.google.cloud.spark.bigquery.BigQueryCatalog");
-    spark.conf().set("spark.sql.catalog.test_catalog_as_select.bigquery_location", "EU");
-    spark.sql("CREATE DATABASE test_catalog_as_select." + database);
-    spark.sql(
-        "CREATE TABLE test_catalog_as_select."
-            + database
-            + "."
-            + newTable
-            + " AS SELECT * FROM public_catalog.samples.shakespeare LIMIT 10");
-    Dataset dataset = bigquery.getDataset(datasetId);
-    assertThat(dataset).isNotNull();
-    assertThat(dataset.getLocation()).isEqualTo("EU");
-    Table table = bigquery.getTable(TableId.of(datasetId.getDataset(), newTable));
-    assertThat(table).isNotNull();
-    bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+    try {
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.public_catalog",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      // Use 'projectId' instead of 'project'
+      spark.conf().set("spark.sql.catalog.public_catalog.projectId", "bigquery-public-data");
+      spark
+          .conf()
+          .set(
+              "spark.sql.catalog.test_catalog_as_select",
+              "com.google.cloud.spark.bigquery.BigQueryCatalog");
+      spark.conf().set("spark.sql.catalog.test_catalog_as_select.bigquery_location", "EU");
+
+      // Add delay for catalog initialization
+      Thread.sleep(2000);
+
+      spark.sql("CREATE DATABASE test_catalog_as_select." + database);
+      
+      // Add another small delay after database creation
+      Thread.sleep(1000);
+      
+      spark.sql(
+          "CREATE TABLE test_catalog_as_select."
+              + database
+              + "."
+              + newTable
+              + " AS SELECT * FROM public_catalog.samples.shakespeare LIMIT 10");
+      Dataset dataset = bigquery.getDataset(datasetId);
+      assertThat(dataset).isNotNull();
+      assertThat(dataset.getLocation()).isEqualTo("EU");
+      Table table = bigquery.getTable(TableId.of(datasetId.getDataset(), newTable));
+      assertThat(table).isNotNull();
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new RuntimeException("Test failed with detailed error", e);
+    } finally {
+      bigquery.delete(datasetId, BigQuery.DatasetDeleteOption.deleteContents());
+      // Clean up catalog configurations
+      try {
+        spark.conf().unset("spark.sql.catalog.public_catalog");
+        spark.conf().unset("spark.sql.catalog.public_catalog.projectId");
+        spark.conf().unset("spark.sql.catalog.test_catalog_as_select");
+        spark.conf().unset("spark.sql.catalog.test_catalog_as_select.bigquery_location");
+      } catch (Exception ignored) {
+      }
+    }
   }
 
   private static SparkSession createSparkSession() {
diff --git a/spark-bigquery-dsv2/spark-3.5-bigquery-lib/src/main/java/com/google/cloud/spark/bigquery/BigQueryCatalog.java b/spark-bigquery-dsv2/spark-3.5-bigquery-lib/src/main/java/com/google/cloud/spark/bigquery/BigQueryCatalog.java
@@ -81,23 +81,32 @@ public class BigQueryCatalog implements TableCatalog, SupportsNamespaces {
 
   @Override
   public void initialize(String name, CaseInsensitiveStringMap caseInsensitiveStringMap) {
-    logger.info("Initializing BigQuery table catalog [{}])", name);
-    Injector injector =
-        new InjectorBuilder()
-            .withOptions(caseInsensitiveStringMap.asCaseSensitiveMap())
-            .withTableIsMandatory(false)
-            .build();
-    tableProvider =
-        StreamSupport.stream(ServiceLoader.load(DataSourceRegister.class).spliterator(), false)
-            .filter(candidate -> candidate.shortName().equals("bigquery"))
-            .map(candidate -> (TableProvider) candidate)
-            .findFirst()
-            .orElseThrow(
-                () -> new IllegalStateException("Could not find a BigQuery TableProvider"));
-    bigQueryClient = injector.getInstance(BigQueryClient.class);
-    schemaConverters =
-        SchemaConverters.from(
-            SchemaConvertersConfiguration.from(injector.getInstance(SparkBigQueryConfig.class)));
+    logger.info(
+        "Initializing BigQuery table catalog [{}] with options: {}",
+        name,
+        caseInsensitiveStringMap);
+    try {
+      Injector injector =
+          new InjectorBuilder()
+              .withOptions(caseInsensitiveStringMap.asCaseSensitiveMap())
+              .withTableIsMandatory(false)
+              .build();
+      tableProvider =
+          StreamSupport.stream(ServiceLoader.load(DataSourceRegister.class).spliterator(), false)
+              .filter(candidate -> candidate.shortName().equals("bigquery"))
+              .map(candidate -> (TableProvider) candidate)
+              .findFirst()
+              .orElseThrow(
+                  () -> new IllegalStateException("Could not find a BigQuery TableProvider"));
+      bigQueryClient = injector.getInstance(BigQueryClient.class);
+      schemaConverters =
+          SchemaConverters.from(
+              SchemaConvertersConfiguration.from(injector.getInstance(SparkBigQueryConfig.class)));
+      logger.info("BigQuery table catalog [{}] initialized successfully", name);
+    } catch (Exception e) {
+      logger.error("Failed to initialize BigQuery catalog [{}]", name, e);
+      throw new BigQueryConnectorException("Failed to initialize BigQuery catalog: " + name, e);
+    }
   }
 
   @Override
@@ -289,10 +298,18 @@ static TableId toTableId(Identifier identifier) {
 
   @Override
   public String[][] listNamespaces() throws NoSuchNamespaceException {
-    return Streams.stream(bigQueryClient.listDatasets())
-        .map(Dataset::getDatasetId)
-        .map(this::toNamespace)
-        .toArray(String[][]::new);
+    if (bigQueryClient == null) {
+      throw new IllegalStateException("BigQuery catalog not properly initialized");
+    }
+    try {
+      return Streams.stream(bigQueryClient.listDatasets())
+          .map(Dataset::getDatasetId)
+          .map(this::toNamespace)
+          .toArray(String[][]::new);
+    } catch (Exception e) {
+      logger.error("Error listing namespaces", e);
+      throw new BigQueryConnectorException("Failed to list namespaces", e);
+    }
   }
 
   private String[] toNamespace(DatasetId datasetId) {