From 71a878c0ed44e2e4ddc783573b18a15b6ceb422c Mon Sep 17 00:00:00 2001 From: JosepSampe Date: Sat, 19 Jul 2025 15:22:34 +0200 Subject: [PATCH 1/3] [GCP Functions] Retry function creation on failure to improve reliability --- lithops/job/job.py | 2 -- .../serverless/backends/gcp_functions/config.py | 10 +++------- .../backends/gcp_functions/gcp_functions.py | 17 +++++++++++++---- lithops/standalone/worker.py | 3 --- lithops/storage/storage.py | 3 --- 5 files changed, 16 insertions(+), 19 deletions(-) diff --git a/lithops/job/job.py b/lithops/job/job.py index dbd6ab2cf..eeb6f44b4 100644 --- a/lithops/job/job.py +++ b/lithops/job/job.py @@ -177,8 +177,6 @@ def _create_job( """ Creates a new Job """ - global FUNCTION_CACHE - ext_env = {} if extra_env is None else extra_env.copy() if ext_env: ext_env = utils.convert_bools_to_string(ext_env) diff --git a/lithops/serverless/backends/gcp_functions/config.py b/lithops/serverless/backends/gcp_functions/config.py index aca4bcb89..0e953b84c 100644 --- a/lithops/serverless/backends/gcp_functions/config.py +++ b/lithops/serverless/backends/gcp_functions/config.py @@ -29,9 +29,6 @@ RUNTIME_MEMORY_MAX = 8192 # 8GB RUNTIME_MEMORY_OPTIONS = {128, 256, 512, 1024, 2048, 4096, 8192} -RETRIES = 5 -RETRY_SLEEP = 20 - AVAILABLE_PY_RUNTIMES = { '3.7': 'python37', '3.8': 'python38', @@ -49,7 +46,9 @@ 'max_workers': 1000, 'worker_processes': 1, 'invoke_pool_threads': 1000, - 'trigger': 'pub/sub' + 'trigger': 'pub/sub', + 'retries': 5, + 'retry_sleep': 10, } REQUIREMENTS_FILE = """ @@ -103,8 +102,5 @@ def load_config(config_data=None): if config_data['gcp_functions']['runtime_memory'] > RUNTIME_MEMORY_MAX: config_data['gcp_functions']['runtime_memory'] = RUNTIME_MEMORY_MAX - config_data['gcp_functions']['retries'] = RETRIES - config_data['gcp_functions']['retry_sleep'] = RETRY_SLEEP - if 'region' not in config_data['gcp']: config_data['gcp']['region'] = config_data['gcp_functions']['region'] diff --git a/lithops/serverless/backends/gcp_functions/gcp_functions.py b/lithops/serverless/backends/gcp_functions/gcp_functions.py index 67efa7572..c0f647728 100644 --- a/lithops/serverless/backends/gcp_functions/gcp_functions.py +++ b/lithops/serverless/backends/gcp_functions/gcp_functions.py @@ -244,10 +244,19 @@ def _create_function(self, runtime_name, memory, timeout=60): 'failurePolicy': {} } - operation = self._api_resource.projects().locations().functions().create( - location=self._default_location, - body=cloud_function - ).execute(num_retries=self.num_retries) + logger.info(f'Deploying function {function_location}') + for attempt in range(self.num_retries): + try: + operation = self._api_resource.projects().locations().functions().create( + location=self._default_location, + body=cloud_function + ).execute() + break + except Exception as e: + if attempt < self.num_retries - 1: + time.sleep(self.retry_sleep) + else: + raise Exception(f"Failed to create Cloud Function after {self.num_retries} attempts.") from e # Wait until the function is completely deployed logger.info('Waiting for the function to be deployed') diff --git a/lithops/standalone/worker.py b/lithops/standalone/worker.py index c7738fd27..03cb77faa 100644 --- a/lithops/standalone/worker.py +++ b/lithops/standalone/worker.py @@ -146,8 +146,6 @@ def notify_task_done(job_key, call_id): def redis_queue_consumer(pid, work_queue_name, exec_mode, backend): - global worker_threads - worker_threads[pid]['status'] = WorkerStatus.IDLE.value logger.info(f"Redis consumer process {pid} started") @@ -213,7 +211,6 @@ def redis_queue_consumer(pid, work_queue_name, exec_mode, backend): def run_worker(): global redis_client global budget_keeper - global worker_threads os.makedirs(LITHOPS_TEMP_DIR, exist_ok=True) diff --git a/lithops/storage/storage.py b/lithops/storage/storage.py index 22a88604e..9852a261d 100644 --- a/lithops/storage/storage.py +++ b/lithops/storage/storage.py @@ -462,9 +462,6 @@ def get_runtime_meta(self, key): :param runtime: name of the runtime :return: runtime metadata """ - - global RUNTIME_META_CACHE - path = [RUNTIMES_PREFIX, key + ".meta.json"] filename_local_path = os.path.join(CACHE_DIR, *path) From 0aab294b5696e18fe711d646cf7ebd7a009f41ef Mon Sep 17 00:00:00 2001 From: JosepSampe Date: Sat, 19 Jul 2025 15:24:43 +0200 Subject: [PATCH 2/3] [GCP Functions] Retry function creation on failure to improve reliability --- lithops/serverless/backends/gcp_functions/config.py | 2 +- lithops/serverless/backends/k8s/entry_point.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lithops/serverless/backends/gcp_functions/config.py b/lithops/serverless/backends/gcp_functions/config.py index 0e953b84c..fc32bf7fd 100644 --- a/lithops/serverless/backends/gcp_functions/config.py +++ b/lithops/serverless/backends/gcp_functions/config.py @@ -48,7 +48,7 @@ 'invoke_pool_threads': 1000, 'trigger': 'pub/sub', 'retries': 5, - 'retry_sleep': 10, + 'retry_sleep': 10 } REQUIREMENTS_FILE = """ diff --git a/lithops/serverless/backends/k8s/entry_point.py b/lithops/serverless/backends/k8s/entry_point.py index e3b576f05..06eb0d45e 100644 --- a/lithops/serverless/backends/k8s/entry_point.py +++ b/lithops/serverless/backends/k8s/entry_point.py @@ -44,8 +44,6 @@ @proxy.route('/get-range///', methods=['GET']) def get_range(jobkey, total_calls, chunksize): - global JOB_INDEXES - range_start = 0 if jobkey not in JOB_INDEXES else JOB_INDEXES[jobkey] range_end = min(range_start + int(chunksize), int(total_calls)) JOB_INDEXES[jobkey] = range_end From e3c23f2c905298d463dd60b4c1658f8e5804dd6f Mon Sep 17 00:00:00 2001 From: JosepSampe Date: Sat, 19 Jul 2025 15:26:56 +0200 Subject: [PATCH 3/3] [GCP Functions] Retry function creation on failure to improve reliability --- lithops/serverless/backends/gcp_functions/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lithops/serverless/backends/gcp_functions/config.py b/lithops/serverless/backends/gcp_functions/config.py index fc32bf7fd..4963ee7c0 100644 --- a/lithops/serverless/backends/gcp_functions/config.py +++ b/lithops/serverless/backends/gcp_functions/config.py @@ -35,7 +35,8 @@ '3.9': 'python39', '3.10': 'python310', '3.11': 'python311', - '3.12': 'python312' + '3.12': 'python312', + '3.13': 'python313' } USER_RUNTIMES_PREFIX = 'lithops.user_runtimes'