brain-tec · bt-admin · Jan 9, 2026 · Nov 22, 2025 · Jun 2, 2025 · Nov 21, 2025
diff --git a/README.md b/README.md
@@ -21,8 +21,8 @@ Available addons
 ----------------
 addon | version | maintainers | summary
 --- | --- | --- | ---
-[queue_job](queue_job/) | 19.0.1.0.0 | <a href='https://github.com/guewen'><img src='https://github.com/guewen.png' width='32' height='32' style='border-radius:50%;' alt='guewen'/></a> | Job Queue
-[test_queue_job](test_queue_job/) | 19.0.1.0.0 |  | Queue Job Tests
+[queue_job](queue_job/) | 19.0.1.0.1 | <a href='https://github.com/guewen'><img src='https://github.com/guewen.png' width='32' height='32' style='border-radius:50%;' alt='guewen'/></a> <a href='https://github.com/sbidoul'><img src='https://github.com/sbidoul.png' width='32' height='32' style='border-radius:50%;' alt='sbidoul'/></a> | Job Queue
+[test_queue_job](test_queue_job/) | 19.0.1.0.1 | <a href='https://github.com/sbidoul'><img src='https://github.com/sbidoul.png' width='32' height='32' style='border-radius:50%;' alt='sbidoul'/></a> | Queue Job Tests
 
 
 Unported addons

diff --git a/queue_job/README.rst b/queue_job/README.rst
@@ -11,7 +11,7 @@ Job Queue
    !! This file is generated by oca-gen-addon-readme !!
    !! changes will be overwritten.                   !!
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-   !! source digest: sha256:0851db31dd9d605b8ce69a7e3f8e5775b3a6dbeeb46f78f9ecc0aa870b0cb61a
+   !! source digest: sha256:6f668a4a03d832fe3d406bd79a70cebf7faa72c6a22371a78aa2c5627103abd4
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 .. |badge1| image:: https://img.shields.io/badge/maturity-Mature-brightgreen.png
@@ -627,21 +627,6 @@ Known issues / Roadmap
 
 - After creating a new database or installing ``queue_job`` on an
   existing database, Odoo must be restarted for the runner to detect it.
-- When Odoo shuts down normally, it waits for running jobs to finish.
-  However, when the Odoo server crashes or is otherwise force-stopped,
-  running jobs are interrupted while the runner has no chance to know
-  they have been aborted. In such situations, jobs may remain in
-  ``started`` or ``enqueued`` state after the Odoo server is halted.
-  Since the runner has no way to know if they are actually running or
-  not, and does not know for sure if it is safe to restart the jobs, it
-  does not attempt to restart them automatically. Such stale jobs
-  therefore fill the running queue and prevent other jobs to start. You
-  must therefore requeue them manually, either from the Jobs view, or by
-  running the following SQL statement *before starting Odoo*:
-
-.. code:: sql
-
-   update queue_job set state='pending' where state in ('started', 'enqueued')
 
 Changelog
 =========
@@ -715,10 +700,13 @@ promote its widespread use.
 .. |maintainer-guewen| image:: https://github.com/guewen.png?size=40px
     :target: https://github.com/guewen
     :alt: guewen
+.. |maintainer-sbidoul| image:: https://github.com/sbidoul.png?size=40px
+    :target: https://github.com/sbidoul
+    :alt: sbidoul
 
-Current `maintainer <https://odoo-community.org/page/maintainer-role>`__:
+Current `maintainers <https://odoo-community.org/page/maintainer-role>`__:
 
-|maintainer-guewen| 
+|maintainer-guewen| |maintainer-sbidoul| 
 
 This module is part of the `OCA/queue <https://github.com/OCA/queue/tree/19.0/queue_job>`_ project on GitHub.
 

diff --git a/queue_job/__manifest__.py b/queue_job/__manifest__.py
@@ -2,7 +2,7 @@
 
 {
     "name": "Job Queue",
-    "version": "19.0.1.0.0",
+    "version": "19.0.1.0.1",
     "author": "Camptocamp,ACSONE SA/NV,Odoo Community Association (OCA)",
     "website": "https://github.com/OCA/queue",
     "license": "LGPL-3",
@@ -29,7 +29,7 @@
     },
     "installable": True,
     "development_status": "Mature",
-    "maintainers": ["guewen"],
+    "maintainers": ["guewen", "sbidoul"],
     "post_init_hook": "post_init_hook",
     "post_load": "post_load",
 }
diff --git a/queue_job/controllers/main.py b/queue_job/controllers/main.py
@@ -27,15 +27,48 @@
 
 
 class RunJobController(http.Controller):
-    def _try_perform_job(self, env, job):
-        """Try to perform the job."""
+    @classmethod
+    def _acquire_job(cls, env: api.Environment, job_uuid: str) -> Job | None:
+        """Acquire a job for execution.
+
+        - make sure it is in ENQUEUED state
+        - mark it as STARTED and commit the state change
+        - acquire the job lock
+
+        If successful, return the Job instance, otherwise return None. This
+        function may fail to acquire the job is not in the expected state or is
+        already locked by another worker.
+        """
+        env.cr.execute(
+            "SELECT uuid FROM queue_job WHERE uuid=%s AND state=%s "
+            "FOR NO KEY UPDATE SKIP LOCKED",
+            (job_uuid, ENQUEUED),
+        )
+        if not env.cr.fetchone():
+            _logger.warning(
+                "was requested to run job %s, but it does not exist, "
+                "or is not in state %s, or is being handled by another worker",
+                job_uuid,
+                ENQUEUED,
+            )
+            return None
+        job = Job.load(env, job_uuid)
+        assert job and job.state == ENQUEUED
         job.set_started()
         job.store()
         env.cr.commit()
-        job.lock()
+        if not job.lock():
+            _logger.warning(
+                "was requested to run job %s, but it could not be locked",
+                job_uuid,
+            )
+            return None
+        return job
 
+    @classmethod
+    def _try_perform_job(cls, env, job):
+        """Try to perform the job, mark it done and commit if successful."""
         _logger.debug("%s started", job)
-
         job.perform()
         # Triggers any stored computed fields before calling 'set_done'
         # so that will be part of the 'exec_time'
@@ -46,18 +79,20 @@ def _try_perform_job(self, env, job):
         env.cr.commit()
         _logger.debug("%s done", job)
 
-    def _enqueue_dependent_jobs(self, env, job):
+    @classmethod
+    def _enqueue_dependent_jobs(cls, env, job):
         tries = 0
         while True:
             try:
-                job.enqueue_waiting()
+                with job.env.cr.savepoint():
+                    job.enqueue_waiting()
             except OperationalError as err:
                 # Automatically retry the typical transaction serialization
                 # errors
                 if err.pgcode not in PG_CONCURRENCY_ERRORS_TO_RETRY:
                     raise
                 if tries >= DEPENDS_MAX_TRIES_ON_CONCURRENCY_FAILURE:
-                    _logger.info(
+                    _logger.error(
                         "%s, maximum number of tries reached to update dependencies",
                         errorcodes.lookup(err.pgcode),
                     )
@@ -75,17 +110,8 @@ def _enqueue_dependent_jobs(self, env, job):
             else:
                 break
 
-    @http.route(
-        "/queue_job/runjob",
-        type="http",
-        auth="none",
-        save_session=False,
-        readonly=False,
-    )
-    def runjob(self, db, job_uuid, **kw):
-        http.request.session.db = db
-        env = http.request.env(user=SUPERUSER_ID)
-
+    @classmethod
+    def _runjob(cls, env: api.Environment, job: Job) -> None:
         def retry_postpone(job, message, seconds=None):
             job.env.clear()
             with Registry(job.env.cr.dbname).cursor() as new_cr:
@@ -94,26 +120,9 @@ def retry_postpone(job, message, seconds=None):
                 job.set_pending(reset_retry=False)
                 job.store()
 
-        # ensure the job to run is in the correct state and lock the record
-        env.cr.execute(
-            "SELECT state FROM queue_job WHERE uuid=%s AND state=%s FOR UPDATE",
-            (job_uuid, ENQUEUED),
-        )
-        if not env.cr.fetchone():
-            _logger.warning(
-                "was requested to run job %s, but it does not exist, "
-                "or is not in state %s",
-                job_uuid,
-                ENQUEUED,
-            )
-            return ""
-
-        job = Job.load(env, job_uuid)
-        assert job and job.state == ENQUEUED
-
         try:
             try:
-                self._try_perform_job(env, job)
+                cls._try_perform_job(env, job)
             except OperationalError as err:
                 # Automatically retry the typical transaction serialization
                 # errors
@@ -131,7 +140,6 @@ def retry_postpone(job, message, seconds=None):
             # traceback in the logs we should have the traceback when all
             # retries are exhausted
             env.cr.rollback()
-            return ""
 
         except (FailedJobError, Exception) as orig_exception:
             buff = StringIO()
@@ -141,19 +149,18 @@ def retry_postpone(job, message, seconds=None):
             job.env.clear()
             with Registry(job.env.cr.dbname).cursor() as new_cr:
                 job.env = job.env(cr=new_cr)
-                vals = self._get_failure_values(job, traceback_txt, orig_exception)
+                vals = cls._get_failure_values(job, traceback_txt, orig_exception)
                 job.set_failed(**vals)
                 job.store()
                 buff.close()
             raise
 
         _logger.debug("%s enqueue depends started", job)
-        self._enqueue_dependent_jobs(env, job)
+        cls._enqueue_dependent_jobs(env, job)
         _logger.debug("%s enqueue depends done", job)
 
-        return ""
-
-    def _get_failure_values(self, job, traceback_txt, orig_exception):
+    @classmethod
+    def _get_failure_values(cls, job, traceback_txt, orig_exception):
         """Collect relevant data from exception."""
         exception_name = orig_exception.__class__.__name__
         if hasattr(orig_exception, "__module__"):
@@ -167,6 +174,22 @@ def _get_failure_values(self, job, traceback_txt, orig_exception):
             "exc_message": exc_message,
         }
 
+    @http.route(
+        "/queue_job/runjob",
+        type="http",
+        auth="none",
+        save_session=False,
+        readonly=False,
+    )
+    def runjob(self, db, job_uuid, **kw):
+        http.request.session.db = db
+        env = http.request.env(user=SUPERUSER_ID)
+        job = self._acquire_job(env, job_uuid)
+        if not job:
+            return ""
+        self._runjob(env, job)
+        return ""
+
     # flake8: noqa: C901
     @http.route("/queue_job/create_test_job", type="http", auth="user")
     def create_test_job(
@@ -177,6 +200,7 @@ def create_test_job(
         description="Test job",
         size=1,
         failure_rate=0,
+        job_duration=0,
     ):
         if not http.request.env.user.has_group("base.group_erp_manager"):
             raise Forbidden(http.request.env._("Access Denied"))
@@ -187,6 +211,12 @@ def create_test_job(
             except (ValueError, TypeError):
                 failure_rate = 0
 
+        if job_duration is not None:
+            try:
+                job_duration = float(job_duration)
+            except (ValueError, TypeError):
+                job_duration = 0
+
         if not (0 <= failure_rate <= 1):
             raise BadRequest("failure_rate must be between 0 and 1")
 
@@ -215,6 +245,7 @@ def create_test_job(
                 channel=channel,
                 description=description,
                 failure_rate=failure_rate,
+                job_duration=job_duration,
             )
 
         if size > 1:
@@ -225,6 +256,7 @@ def create_test_job(
                 channel=channel,
                 description=description,
                 failure_rate=failure_rate,
+                job_duration=job_duration,
             )
         return ""
 
@@ -236,6 +268,7 @@ def _create_single_test_job(
         description="Test job",
         size=1,
         failure_rate=0,
+        job_duration=0,
     ):
         delayed = (
             http.request.env["queue.job"]
@@ -245,7 +278,7 @@ def _create_single_test_job(
                 channel=channel,
                 description=description,
             )
-            ._test_job(failure_rate=failure_rate)
+            ._test_job(failure_rate=failure_rate, job_duration=job_duration)
         )
         return f"job uuid: {delayed.db_record().uuid}"
 
@@ -259,6 +292,7 @@ def _create_graph_test_jobs(
         channel=None,
         description="Test job",
         failure_rate=0,
+        job_duration=0,
     ):
         model = http.request.env["queue.job"]
         current_count = 0
@@ -281,7 +315,7 @@ def _create_graph_test_jobs(
                         max_retries=max_retries,
                         channel=channel,
                         description=f"{description} #{current_count}",
-                    )._test_job(failure_rate=failure_rate)
+                    )._test_job(failure_rate=failure_rate, job_duration=job_duration)
                 )
 
             grouping = random.choice(possible_grouping_methods)

diff --git a/queue_job/job.py b/queue_job/job.py
@@ -222,7 +222,7 @@ def load_many(cls, env, job_uuids):
         recordset = cls.db_records_from_uuids(env, job_uuids)
         return {cls._load_from_db_record(record) for record in recordset}
 
-    def add_lock_record(self):
+    def add_lock_record(self) -> None:
         """
         Create row in db to be locked while the job is being performed.
         """
@@ -242,13 +242,11 @@ def add_lock_record(self):
             [self.uuid],
         )
 
-    def lock(self):
-        """
-        Lock row of job that is being performed
+    def lock(self) -> bool:
+        """Lock row of job that is being performed.
 
-        If a job cannot be locked,
-        it means that the job wasn't started,
-        a RetryableJobError is thrown.
+        Return False if a job cannot be locked: it means that the job is not in
+        STARTED state or is already locked by another worker.
         """
         self.env.cr.execute(
             """
@@ -264,18 +262,15 @@ def lock(self):
                         queue_job
                     WHERE
                         uuid = %s
-                        AND state='started'
+                        AND state = %s
                 )
-            FOR UPDATE;
+            FOR NO KEY UPDATE SKIP LOCKED;
         """,
-            [self.uuid],
+            [self.uuid, STARTED],
         )
 
         # 1 job should be locked
-        if 1 != len(self.env.cr.fetchall()):
-            raise RetryableJobError(
-                f"Trying to lock job that wasn't started, uuid: {self.uuid}"
-            )
+        return bool(self.env.cr.fetchall())
 
     @classmethod
     def _load_from_db_record(cls, job_db_record):