Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ Available addons
----------------
addon | version | maintainers | summary
--- | --- | --- | ---
[queue_job](queue_job/) | 19.0.1.0.0 | <a href='https://github.com/guewen'><img src='https://github.com/guewen.png' width='32' height='32' style='border-radius:50%;' alt='guewen'/></a> | Job Queue
[test_queue_job](test_queue_job/) | 19.0.1.0.0 | | Queue Job Tests
[queue_job](queue_job/) | 19.0.1.0.1 | <a href='https://github.com/guewen'><img src='https://github.com/guewen.png' width='32' height='32' style='border-radius:50%;' alt='guewen'/></a> <a href='https://github.com/sbidoul'><img src='https://github.com/sbidoul.png' width='32' height='32' style='border-radius:50%;' alt='sbidoul'/></a> | Job Queue
[test_queue_job](test_queue_job/) | 19.0.1.0.1 | <a href='https://github.com/sbidoul'><img src='https://github.com/sbidoul.png' width='32' height='32' style='border-radius:50%;' alt='sbidoul'/></a> | Queue Job Tests


Unported addons
Expand Down
24 changes: 6 additions & 18 deletions queue_job/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Job Queue
!! This file is generated by oca-gen-addon-readme !!
!! changes will be overwritten. !!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!! source digest: sha256:0851db31dd9d605b8ce69a7e3f8e5775b3a6dbeeb46f78f9ecc0aa870b0cb61a
!! source digest: sha256:6f668a4a03d832fe3d406bd79a70cebf7faa72c6a22371a78aa2c5627103abd4
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

.. |badge1| image:: https://img.shields.io/badge/maturity-Mature-brightgreen.png
Expand Down Expand Up @@ -627,21 +627,6 @@ Known issues / Roadmap

- After creating a new database or installing ``queue_job`` on an
existing database, Odoo must be restarted for the runner to detect it.
- When Odoo shuts down normally, it waits for running jobs to finish.
However, when the Odoo server crashes or is otherwise force-stopped,
running jobs are interrupted while the runner has no chance to know
they have been aborted. In such situations, jobs may remain in
``started`` or ``enqueued`` state after the Odoo server is halted.
Since the runner has no way to know if they are actually running or
not, and does not know for sure if it is safe to restart the jobs, it
does not attempt to restart them automatically. Such stale jobs
therefore fill the running queue and prevent other jobs to start. You
must therefore requeue them manually, either from the Jobs view, or by
running the following SQL statement *before starting Odoo*:

.. code:: sql

update queue_job set state='pending' where state in ('started', 'enqueued')

Changelog
=========
Expand Down Expand Up @@ -715,10 +700,13 @@ promote its widespread use.
.. |maintainer-guewen| image:: https://github.com/guewen.png?size=40px
:target: https://github.com/guewen
:alt: guewen
.. |maintainer-sbidoul| image:: https://github.com/sbidoul.png?size=40px
:target: https://github.com/sbidoul
:alt: sbidoul

Current `maintainer <https://odoo-community.org/page/maintainer-role>`__:
Current `maintainers <https://odoo-community.org/page/maintainer-role>`__:

|maintainer-guewen|
|maintainer-guewen| |maintainer-sbidoul|

This module is part of the `OCA/queue <https://github.com/OCA/queue/tree/19.0/queue_job>`_ project on GitHub.

Expand Down
4 changes: 2 additions & 2 deletions queue_job/__manifest__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

{
"name": "Job Queue",
"version": "19.0.1.0.0",
"version": "19.0.1.0.1",
"author": "Camptocamp,ACSONE SA/NV,Odoo Community Association (OCA)",
"website": "https://github.com/OCA/queue",
"license": "LGPL-3",
Expand All @@ -29,7 +29,7 @@
},
"installable": True,
"development_status": "Mature",
"maintainers": ["guewen"],
"maintainers": ["guewen", "sbidoul"],
"post_init_hook": "post_init_hook",
"post_load": "post_load",
}
122 changes: 78 additions & 44 deletions queue_job/controllers/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,48 @@


class RunJobController(http.Controller):
def _try_perform_job(self, env, job):
"""Try to perform the job."""
@classmethod
def _acquire_job(cls, env: api.Environment, job_uuid: str) -> Job | None:
"""Acquire a job for execution.

- make sure it is in ENQUEUED state
- mark it as STARTED and commit the state change
- acquire the job lock

If successful, return the Job instance, otherwise return None. This
function may fail to acquire the job is not in the expected state or is
already locked by another worker.
"""
env.cr.execute(
"SELECT uuid FROM queue_job WHERE uuid=%s AND state=%s "
"FOR NO KEY UPDATE SKIP LOCKED",
(job_uuid, ENQUEUED),
)
if not env.cr.fetchone():
_logger.warning(
"was requested to run job %s, but it does not exist, "
"or is not in state %s, or is being handled by another worker",
job_uuid,
ENQUEUED,
)
return None
job = Job.load(env, job_uuid)
assert job and job.state == ENQUEUED
job.set_started()
job.store()
env.cr.commit()
job.lock()
if not job.lock():
_logger.warning(
"was requested to run job %s, but it could not be locked",
job_uuid,
)
return None
return job

@classmethod
def _try_perform_job(cls, env, job):
"""Try to perform the job, mark it done and commit if successful."""
_logger.debug("%s started", job)

job.perform()
# Triggers any stored computed fields before calling 'set_done'
# so that will be part of the 'exec_time'
Expand All @@ -46,18 +79,20 @@ def _try_perform_job(self, env, job):
env.cr.commit()
_logger.debug("%s done", job)

def _enqueue_dependent_jobs(self, env, job):
@classmethod
def _enqueue_dependent_jobs(cls, env, job):
tries = 0
while True:
try:
job.enqueue_waiting()
with job.env.cr.savepoint():
job.enqueue_waiting()
except OperationalError as err:
# Automatically retry the typical transaction serialization
# errors
if err.pgcode not in PG_CONCURRENCY_ERRORS_TO_RETRY:
raise
if tries >= DEPENDS_MAX_TRIES_ON_CONCURRENCY_FAILURE:
_logger.info(
_logger.error(
"%s, maximum number of tries reached to update dependencies",
errorcodes.lookup(err.pgcode),
)
Expand All @@ -75,17 +110,8 @@ def _enqueue_dependent_jobs(self, env, job):
else:
break

@http.route(
"/queue_job/runjob",
type="http",
auth="none",
save_session=False,
readonly=False,
)
def runjob(self, db, job_uuid, **kw):
http.request.session.db = db
env = http.request.env(user=SUPERUSER_ID)

@classmethod
def _runjob(cls, env: api.Environment, job: Job) -> None:
def retry_postpone(job, message, seconds=None):
job.env.clear()
with Registry(job.env.cr.dbname).cursor() as new_cr:
Expand All @@ -94,26 +120,9 @@ def retry_postpone(job, message, seconds=None):
job.set_pending(reset_retry=False)
job.store()

# ensure the job to run is in the correct state and lock the record
env.cr.execute(
"SELECT state FROM queue_job WHERE uuid=%s AND state=%s FOR UPDATE",
(job_uuid, ENQUEUED),
)
if not env.cr.fetchone():
_logger.warning(
"was requested to run job %s, but it does not exist, "
"or is not in state %s",
job_uuid,
ENQUEUED,
)
return ""

job = Job.load(env, job_uuid)
assert job and job.state == ENQUEUED

try:
try:
self._try_perform_job(env, job)
cls._try_perform_job(env, job)
except OperationalError as err:
# Automatically retry the typical transaction serialization
# errors
Expand All @@ -131,7 +140,6 @@ def retry_postpone(job, message, seconds=None):
# traceback in the logs we should have the traceback when all
# retries are exhausted
env.cr.rollback()
return ""

except (FailedJobError, Exception) as orig_exception:
buff = StringIO()
Expand All @@ -141,19 +149,18 @@ def retry_postpone(job, message, seconds=None):
job.env.clear()
with Registry(job.env.cr.dbname).cursor() as new_cr:
job.env = job.env(cr=new_cr)
vals = self._get_failure_values(job, traceback_txt, orig_exception)
vals = cls._get_failure_values(job, traceback_txt, orig_exception)
job.set_failed(**vals)
job.store()
buff.close()
raise

_logger.debug("%s enqueue depends started", job)
self._enqueue_dependent_jobs(env, job)
cls._enqueue_dependent_jobs(env, job)
_logger.debug("%s enqueue depends done", job)

return ""

def _get_failure_values(self, job, traceback_txt, orig_exception):
@classmethod
def _get_failure_values(cls, job, traceback_txt, orig_exception):
"""Collect relevant data from exception."""
exception_name = orig_exception.__class__.__name__
if hasattr(orig_exception, "__module__"):
Expand All @@ -167,6 +174,22 @@ def _get_failure_values(self, job, traceback_txt, orig_exception):
"exc_message": exc_message,
}

@http.route(
"/queue_job/runjob",
type="http",
auth="none",
save_session=False,
readonly=False,
)
def runjob(self, db, job_uuid, **kw):
http.request.session.db = db
env = http.request.env(user=SUPERUSER_ID)
job = self._acquire_job(env, job_uuid)
if not job:
return ""
self._runjob(env, job)
return ""

# flake8: noqa: C901
@http.route("/queue_job/create_test_job", type="http", auth="user")
def create_test_job(
Expand All @@ -177,6 +200,7 @@ def create_test_job(
description="Test job",
size=1,
failure_rate=0,
job_duration=0,
):
if not http.request.env.user.has_group("base.group_erp_manager"):
raise Forbidden(http.request.env._("Access Denied"))
Expand All @@ -187,6 +211,12 @@ def create_test_job(
except (ValueError, TypeError):
failure_rate = 0

if job_duration is not None:
try:
job_duration = float(job_duration)
except (ValueError, TypeError):
job_duration = 0

if not (0 <= failure_rate <= 1):
raise BadRequest("failure_rate must be between 0 and 1")

Expand Down Expand Up @@ -215,6 +245,7 @@ def create_test_job(
channel=channel,
description=description,
failure_rate=failure_rate,
job_duration=job_duration,
)

if size > 1:
Expand All @@ -225,6 +256,7 @@ def create_test_job(
channel=channel,
description=description,
failure_rate=failure_rate,
job_duration=job_duration,
)
return ""

Expand All @@ -236,6 +268,7 @@ def _create_single_test_job(
description="Test job",
size=1,
failure_rate=0,
job_duration=0,
):
delayed = (
http.request.env["queue.job"]
Expand All @@ -245,7 +278,7 @@ def _create_single_test_job(
channel=channel,
description=description,
)
._test_job(failure_rate=failure_rate)
._test_job(failure_rate=failure_rate, job_duration=job_duration)
)
return f"job uuid: {delayed.db_record().uuid}"

Expand All @@ -259,6 +292,7 @@ def _create_graph_test_jobs(
channel=None,
description="Test job",
failure_rate=0,
job_duration=0,
):
model = http.request.env["queue.job"]
current_count = 0
Expand All @@ -281,7 +315,7 @@ def _create_graph_test_jobs(
max_retries=max_retries,
channel=channel,
description=f"{description} #{current_count}",
)._test_job(failure_rate=failure_rate)
)._test_job(failure_rate=failure_rate, job_duration=job_duration)
)

grouping = random.choice(possible_grouping_methods)
Expand Down
23 changes: 9 additions & 14 deletions queue_job/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def load_many(cls, env, job_uuids):
recordset = cls.db_records_from_uuids(env, job_uuids)
return {cls._load_from_db_record(record) for record in recordset}

def add_lock_record(self):
def add_lock_record(self) -> None:
"""
Create row in db to be locked while the job is being performed.
"""
Expand All @@ -242,13 +242,11 @@ def add_lock_record(self):
[self.uuid],
)

def lock(self):
"""
Lock row of job that is being performed
def lock(self) -> bool:
"""Lock row of job that is being performed.

If a job cannot be locked,
it means that the job wasn't started,
a RetryableJobError is thrown.
Return False if a job cannot be locked: it means that the job is not in
STARTED state or is already locked by another worker.
"""
self.env.cr.execute(
"""
Expand All @@ -264,18 +262,15 @@ def lock(self):
queue_job
WHERE
uuid = %s
AND state='started'
AND state = %s
)
FOR UPDATE;
FOR NO KEY UPDATE SKIP LOCKED;
""",
[self.uuid],
[self.uuid, STARTED],
)

# 1 job should be locked
if 1 != len(self.env.cr.fetchall()):
raise RetryableJobError(
f"Trying to lock job that wasn't started, uuid: {self.uuid}"
)
return bool(self.env.cr.fetchall())

@classmethod
def _load_from_db_record(cls, job_db_record):
Expand Down
Loading