diff --git a/.riot/requirements/173f5b3.txt b/.riot/requirements/173f5b3.txt new file mode 100644 index 00000000000..83216fb74bd --- /dev/null +++ b/.riot/requirements/173f5b3.txt @@ -0,0 +1,36 @@ +# +# This file is autogenerated by pip-compile with Python 3.14 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/173f5b3.in +# +attrs==25.4.0 +coverage[toml]==7.12.0 +gevent==25.9.1 +greenlet==3.3.0 +gunicorn[gevent]==23.0.0 +hypothesis==6.45.0 +iniconfig==2.3.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +mock==5.2.0 +opentracing==2.4.0 +packaging==25.0 +pluggy==1.6.0 +protobuf==6.33.1 +py-cpuinfo==8.0.0 +pygments==2.19.2 +pytest==9.0.1 +pytest-asyncio==0.21.1 +pytest-benchmark==5.2.3 +pytest-cov==7.0.0 +pytest-cpp==2.6.0 +pytest-mock==3.15.1 +pytest-randomly==4.0.1 +referencing==0.37.0 +rpds-py==0.30.0 +sortedcontainers==2.4.0 +uwsgi==2.0.31 +zope-event==6.1 +zope-interface==8.1.1 +zstandard==0.25.0 diff --git a/.riot/requirements/1a4c947.txt b/.riot/requirements/1a4c947.txt new file mode 100644 index 00000000000..ae55f5306be --- /dev/null +++ b/.riot/requirements/1a4c947.txt @@ -0,0 +1,31 @@ +# +# This file is autogenerated by pip-compile with Python 3.14 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/1a4c947.in +# +attrs==25.4.0 +coverage[toml]==7.12.0 +gunicorn==23.0.0 +hypothesis==6.45.0 +iniconfig==2.3.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +mock==5.2.0 +opentracing==2.4.0 +packaging==25.0 +pluggy==1.6.0 +protobuf==6.33.1 +py-cpuinfo==8.0.0 +pygments==2.19.2 +pytest==9.0.1 +pytest-asyncio==0.21.1 +pytest-benchmark==5.2.3 +pytest-cov==7.0.0 +pytest-cpp==2.6.0 +pytest-mock==3.15.1 +pytest-randomly==4.0.1 +referencing==0.37.0 +rpds-py==0.30.0 +sortedcontainers==2.4.0 +zstandard==0.25.0 diff --git a/.riot/requirements/72ed1ec.txt b/.riot/requirements/72ed1ec.txt new file mode 100644 index 00000000000..453ed140c3d --- /dev/null +++ b/.riot/requirements/72ed1ec.txt @@ -0,0 +1,32 @@ +# +# This file is autogenerated by pip-compile with Python 3.14 +# by the following command: +# +# pip-compile --allow-unsafe --no-annotate .riot/requirements/72ed1ec.in +# +attrs==25.4.0 +coverage[toml]==7.12.0 +gunicorn==23.0.0 +hypothesis==6.45.0 +iniconfig==2.3.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +mock==5.2.0 +opentracing==2.4.0 +packaging==25.0 +pluggy==1.6.0 +protobuf==6.33.1 +py-cpuinfo==8.0.0 +pygments==2.19.2 +pytest==9.0.1 +pytest-asyncio==0.21.1 +pytest-benchmark==5.2.3 +pytest-cov==7.0.0 +pytest-cpp==2.6.0 +pytest-mock==3.15.1 +pytest-randomly==4.0.1 +referencing==0.37.0 +rpds-py==0.30.0 +sortedcontainers==2.4.0 +uwsgi==2.0.31 +zstandard==0.25.0 diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h index dbdfada1832..2d88e67821b 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/cpython/tasks.h @@ -11,9 +11,18 @@ #include #define Py_BUILD_CORE -#if PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030e0000 +#include +#include +#include +#include +#include +#include +#include +#elif PY_VERSION_HEX >= 0x030d0000 #include #else +#include #include #include #endif // PY_VERSION_HEX >= 0x030d0000 @@ -38,7 +47,32 @@ extern "C" STATE_FINISHED } fut_state; -#if PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030e0000 +// Python 3.14+: New fields added (awaited_by, is_task, awaited_by_is_set) +#define FutureObj_HEAD(prefix) \ + PyObject_HEAD PyObject* prefix##_loop; \ + PyObject* prefix##_callback0; \ + PyObject* prefix##_context0; \ + PyObject* prefix##_callbacks; \ + PyObject* prefix##_exception; \ + PyObject* prefix##_exception_tb; \ + PyObject* prefix##_result; \ + PyObject* prefix##_source_tb; \ + PyObject* prefix##_cancel_msg; \ + PyObject* prefix##_cancelled_exc; \ + PyObject* prefix##_awaited_by; \ + fut_state prefix##_state; \ + /* Used by profilers to make traversing the stack from an external \ + process faster. */ \ + char prefix##_is_task; \ + char prefix##_awaited_by_is_set; \ + /* These bitfields need to be at the end of the struct \ + so that these and bitfields from TaskObj are contiguous. \ + */ \ + unsigned prefix##_log_tb : 1; \ + unsigned prefix##_blocking : 1; + +#elif PY_VERSION_HEX >= 0x030d0000 #define FutureObj_HEAD(prefix) \ PyObject_HEAD PyObject* prefix##_loop; \ PyObject* prefix##_callback0; \ @@ -131,7 +165,24 @@ extern "C" FutureObj_HEAD(future) } FutureObj; -#if PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030e0000 + // Python 3.14+: TaskObj includes task_node for linked-list storage + typedef struct + { + FutureObj_HEAD(task) unsigned task_must_cancel : 1; + unsigned task_log_destroy_pending : 1; + int task_num_cancels_requested; + PyObject* task_fut_waiter; + PyObject* task_coro; + PyObject* task_name; + PyObject* task_context; + struct llist_node task_node; +#ifdef Py_GIL_DISABLED + // thread id of the thread where this task was created + uintptr_t task_tid; +#endif + } TaskObj; +#elif PY_VERSION_HEX >= 0x030d0000 typedef struct { FutureObj_HEAD(task) unsigned task_must_cancel : 1; @@ -173,7 +224,67 @@ extern "C" #define RESUME_QUICK INSTRUMENTED_RESUME #endif -#if PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030e0000 + // Python 3.14+: Use stackpointer and _PyStackRef + + inline PyObject* PyGen_yf(PyGenObject* gen, PyObject* frame_addr) + { + if (gen->gi_frame_state != FRAME_SUSPENDED_YIELD_FROM) { + return nullptr; + } + + _PyInterpreterFrame frame; + if (copy_type(frame_addr, frame)) { + return nullptr; + } + + // CPython asserts the following: + // assert(f->stackpointer > f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus); + // assert(!PyStackRef_IsNull(f->stackpointer[-1])); + + // Though we have to pay the price of copying the code object, we need + // to do this to catch the case where the stack is empty, as accessing + // frame.stackpointer[-1] would be an undefined behavior. + // This is necessary as frame.stacktop is removed in 3.14. + PyCodeObject code; + auto code_addr = reinterpret_cast(BITS_TO_PTR_MASKED(frame.f_executable)); + if (copy_type(code_addr, code)) { + return nullptr; + } + + uintptr_t frame_addr_uint = reinterpret_cast(frame_addr); + uintptr_t localsplus_addr = frame_addr_uint + offsetof(_PyInterpreterFrame, localsplus); + // This computes f->localsplus + code.co_nlocalsplus. + uintptr_t stackbase_addr = localsplus_addr + code.co_nlocalsplus * sizeof(_PyStackRef); + + uintptr_t stackpointer_addr = reinterpret_cast(frame.stackpointer); + // We want stackpointer_addr to be greater than the stackbase_addr, + // that is, the stack is not empty. + if (stackpointer_addr <= stackbase_addr) { + return nullptr; + } + + // We can also calculate stacktop and check that it is within a reasonable range. + // Similar to 3.13's stacktop check below. + int stacktop = (int)((stackpointer_addr - stackbase_addr) / sizeof(_PyStackRef)); + + if (stacktop < 1 || stacktop > MAX_STACK_SIZE) { + return nullptr; + } + + // Read the top of stack directly from remote memory + // This is equivalent to CPython's frame.stackpointer[-1]. + _PyStackRef top_ref; + if (copy_type(reinterpret_cast(stackpointer_addr - sizeof(_PyStackRef)), top_ref)) { + return nullptr; + } + + // Extract PyObject* from _PyStackRef.bits + // Per Python 3.14 release notes (gh-123923): clear LSB to recover PyObject* pointer + return BITS_TO_PTR_MASKED(top_ref); + } + +#elif PY_VERSION_HEX >= 0x030d0000 inline PyObject* PyGen_yf(PyGenObject* gen, PyObject* frame_addr) { diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h index 1ffd0f7b5c3..1092d15d52f 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/frame.h @@ -14,11 +14,10 @@ #undef _PyGC_FINALIZED #endif #include -#if PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030e0000 #define Py_BUILD_CORE -#include -#endif // PY_VERSION_HEX >= 0x030d0000 -#if PY_VERSION_HEX >= 0x030b0000 +#include +#elif PY_VERSION_HEX >= 0x030b0000 #define Py_BUILD_CORE #include #endif diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h index 997171f33c7..4aba2d9961f 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/greenlets.h @@ -7,6 +7,10 @@ #include #define Py_BUILD_CORE +#if PY_VERSION_HEX >= 0x030e0000 +#include +#endif + #include #include diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h index 62657538fd6..bc033bb8e66 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/state.h @@ -17,6 +17,9 @@ #endif #define Py_BUILD_CORE #include +#if PY_VERSION_HEX >= 0x030e0000 +#include +#endif #include diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h index 3efb4df65db..e3fa35ac120 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/tasks.h @@ -4,19 +4,22 @@ #pragma once -#include - #define PY_SSIZE_T_CLEAN #include -#include +#include #if PY_VERSION_HEX >= 0x030b0000 #include #define Py_BUILD_CORE -#if PY_VERSION_HEX >= 0x030d0000 +#include +#if PY_VERSION_HEX >= 0x030e0000 +#include +#include +#elif PY_VERSION_HEX >= 0x030d0000 #include #else +#include #include #endif // PY_VERSION_HEX >= 0x030d0000 #else @@ -275,67 +278,6 @@ TaskInfo::current(PyObject* loop) return TaskInfo::create(reinterpret_cast(task)); } -// ---------------------------------------------------------------------------- -// TODO: Make this a "for_each_task" function? -[[nodiscard]] inline Result> -get_all_tasks(PyObject* loop) -{ - std::vector tasks; - if (loop == NULL) - return tasks; - - auto maybe_scheduled_tasks_set = MirrorSet::create(asyncio_scheduled_tasks); - if (!maybe_scheduled_tasks_set) { - return ErrorKind::TaskInfoError; - } - - auto scheduled_tasks_set = std::move(*maybe_scheduled_tasks_set); - auto maybe_scheduled_tasks = scheduled_tasks_set.as_unordered_set(); - if (!maybe_scheduled_tasks) { - return ErrorKind::TaskInfoError; - } - - auto scheduled_tasks = std::move(*maybe_scheduled_tasks); - for (auto task_wr_addr : scheduled_tasks) { - PyWeakReference task_wr; - if (copy_type(task_wr_addr, task_wr)) - continue; - - auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_wr.wr_object)); - if (maybe_task_info) { - if ((*maybe_task_info)->loop == loop) { - tasks.push_back(std::move(*maybe_task_info)); - } - } - } - - if (asyncio_eager_tasks != NULL) { - auto maybe_eager_tasks_set = MirrorSet::create(asyncio_eager_tasks); - if (!maybe_eager_tasks_set) { - return ErrorKind::TaskInfoError; - } - - auto eager_tasks_set = std::move(*maybe_eager_tasks_set); - - auto maybe_eager_tasks = eager_tasks_set.as_unordered_set(); - if (!maybe_eager_tasks) { - return ErrorKind::TaskInfoError; - } - - auto eager_tasks = std::move(*maybe_eager_tasks); - for (auto task_addr : eager_tasks) { - auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_addr)); - if (maybe_task_info) { - if ((*maybe_task_info)->loop == loop) { - tasks.push_back(std::move(*maybe_task_info)); - } - } - } - } - - return tasks; -} - // ---------------------------------------------------------------------------- inline std::vector> current_tasks; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h index bca5ca7f1a5..4b1ba9da22c 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h +++ b/ddtrace/internal/datadog/profiling/stack_v2/echion/echion/threads.h @@ -7,6 +7,10 @@ #include #define Py_BUILD_CORE +#if PY_VERSION_HEX >= 0x030e0000 +#include +#endif + #include #include #include @@ -46,6 +50,7 @@ class ThreadInfo microsecond_t cpu_time; uintptr_t asyncio_loop = 0; + uintptr_t tstate_addr = 0; // Remote address of PyThreadState for accessing asyncio_tasks_head [[nodiscard]] Result update_cpu_time(); bool is_running(); @@ -101,8 +106,15 @@ class ThreadInfo }; private: - [[nodiscard]] Result unwind_tasks(); + [[nodiscard]] Result unwind_tasks(PyThreadState*); void unwind_greenlets(PyThreadState*, unsigned long); + [[nodiscard]] Result> get_all_tasks(PyThreadState* tstate); +#if PY_VERSION_HEX >= 0x030e0000 + [[nodiscard]] Result get_tasks_from_thread_linked_list(std::vector& tasks); + [[nodiscard]] Result get_tasks_from_interpreter_linked_list(PyThreadState* tstate, + std::vector& tasks); + [[nodiscard]] Result get_tasks_from_linked_list(uintptr_t head_addr, std::vector& tasks); +#endif }; inline Result @@ -192,7 +204,7 @@ ThreadInfo::unwind(PyThreadState* tstate) if (asyncio_loop) { // unwind_tasks returns a [[nodiscard]] Result. // We cast it to void to ignore failures. - (void)unwind_tasks(); + (void)unwind_tasks(tstate); } // We make the assumption that gevent and asyncio are not mixed @@ -203,7 +215,7 @@ ThreadInfo::unwind(PyThreadState* tstate) // ---------------------------------------------------------------------------- inline Result -ThreadInfo::unwind_tasks() +ThreadInfo::unwind_tasks(PyThreadState* tstate) { std::vector leaf_tasks; std::unordered_set parent_tasks; @@ -211,7 +223,7 @@ ThreadInfo::unwind_tasks() std::unordered_map origin_map; // Indexed by task origin static std::unordered_set previous_task_objects; - auto maybe_all_tasks = get_all_tasks(reinterpret_cast(asyncio_loop)); + auto maybe_all_tasks = get_all_tasks(tstate); if (!maybe_all_tasks) { return ErrorKind::TaskInfoError; } @@ -257,7 +269,7 @@ ThreadInfo::unwind_tasks() for (auto& task : all_tasks) { origin_map.emplace(task->origin, std::ref(*task)); - if (task->waiter != NULL) + if (task->waiter != nullptr) waitee_map.emplace(task->waiter->origin, std::ref(*task)); else if (parent_tasks.find(task->origin) == parent_tasks.end()) { leaf_tasks.push_back(std::ref(*task)); @@ -353,6 +365,235 @@ ThreadInfo::unwind_tasks() return Result::ok(); } +// ---------------------------------------------------------------------------- +#if PY_VERSION_HEX >= 0x030e0000 +inline Result +ThreadInfo::get_tasks_from_thread_linked_list(std::vector& tasks) +{ + if (this->tstate_addr == 0 || this->asyncio_loop == 0) { + return ErrorKind::TaskInfoError; + } + + // Calculate thread state's asyncio_tasks_head remote address + // Note: Since 3.13+, every PyThreadState is actually allocated as a _PyThreadStateImpl. + // We use PyThreadState* everywhere and cast to _PyThreadStateImpl* only when we need + // to access asyncio_tasks_head (which is only available in Python 3.14+). + // Since tstate_addr is a remote address, we calculate the offset and add it to the address. + // get_tasks_from_linked_list will handle copying the head node from remote memory internally. + constexpr size_t asyncio_tasks_head_offset = offsetof(_PyThreadStateImpl, asyncio_tasks_head); + uintptr_t head_addr = this->tstate_addr + asyncio_tasks_head_offset; + + return get_tasks_from_linked_list(head_addr, tasks); +} + +inline Result +ThreadInfo::get_tasks_from_interpreter_linked_list(PyThreadState* tstate, std::vector& tasks) +{ + if (tstate == nullptr || tstate->interp == nullptr || this->asyncio_loop == 0) { + return ErrorKind::TaskInfoError; + } + + constexpr size_t asyncio_tasks_head_offset = offsetof(PyInterpreterState, asyncio_tasks_head); + uintptr_t head_addr = reinterpret_cast(tstate->interp) + asyncio_tasks_head_offset; + + return get_tasks_from_linked_list(head_addr, tasks); +} + +inline Result +ThreadInfo::get_tasks_from_linked_list(uintptr_t head_addr, std::vector& tasks) +{ + if (head_addr == 0 || this->asyncio_loop == 0) { + return ErrorKind::TaskInfoError; + } + + // Copy head node struct from remote memory to local memory + struct llist_node head_node_local; + if (copy_type(reinterpret_cast(head_addr), head_node_local)) { + return ErrorKind::TaskInfoError; + } + + // Check if list is empty (head points to itself in circular list) + uintptr_t head_addr_uint = head_addr; + uintptr_t next_as_uint = reinterpret_cast(head_node_local.next); + uintptr_t prev_as_uint = reinterpret_cast(head_node_local.prev); + if (next_as_uint == head_addr_uint && prev_as_uint == head_addr_uint) { + return Result::ok(); + } + + struct llist_node current_node = head_node_local; // Start with head node + uintptr_t current_node_addr = head_addr; // Address of current node + + // Copied from CPython's _remote_debugging_module.c: MAX_ITERATIONS + const size_t MAX_ITERATIONS = 1 << 16; + size_t iteration_count = 0; + + // Iterate over linked-list. The linked list is circular, so we stop + // when we're back at head. + while (reinterpret_cast(current_node.next) != head_addr_uint) { + // Safety: prevent infinite loops + if (++iteration_count > MAX_ITERATIONS) { + return ErrorKind::TaskInfoError; + } + + if (current_node.next == nullptr) { + return ErrorKind::TaskInfoError; // nullptr pointer - invalid list + } + + uintptr_t next_node_addr = reinterpret_cast(current_node.next); + + // Calculate task_addr from current_node.next + size_t task_node_offset_val = offsetof(TaskObj, task_node); + uintptr_t task_addr_uint = next_node_addr - task_node_offset_val; + + // Create TaskInfo for the task + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_addr_uint)); + if (maybe_task_info) { + auto& task_info = *maybe_task_info; + if (task_info->loop == reinterpret_cast(this->asyncio_loop)) { + tasks.push_back(std::move(task_info)); + } + } + + // Read next node from current_node.next into current_node + if (copy_type(reinterpret_cast(next_node_addr), current_node)) { + return ErrorKind::TaskInfoError; // Failed to read next node + } + current_node_addr = next_node_addr; // Update address for next iteration + } + + return Result::ok(); +} + +inline Result> +ThreadInfo::get_all_tasks(PyThreadState* tstate) +{ + std::vector tasks; + if (this->asyncio_loop == 0) + return tasks; + + // Python 3.14+: Native tasks are in linked-list per thread AND per interpreter + // CPython iterates over both: + // 1. Per-thread list: tstate->asyncio_tasks_head (active tasks) + // 2. Per-interpreter list: interp->asyncio_tasks_head (lingering tasks) + // First, get tasks from this thread's linked-list (if tstate_addr is set) + // Note: We continue processing even if one source fails to maximize partial results + if (tstate != nullptr && this->tstate_addr != 0) { + (void)get_tasks_from_thread_linked_list(tasks); + + // Second, get tasks from interpreter's linked-list (lingering tasks) + (void)get_tasks_from_interpreter_linked_list(tstate, tasks); + } + + // Handle third-party tasks from Python _scheduled_tasks WeakSet + // In Python 3.14+, _scheduled_tasks is a Python-level weakref.WeakSet() that only contains + // tasks that don't inherit from asyncio.Task. Native asyncio.Task instances are stored + // in linked-lists (handled above) and are NOT added to _scheduled_tasks. + // This is typically empty in practice, but we handle it for completeness. + if (asyncio_scheduled_tasks != nullptr) { + if (auto maybe_scheduled_tasks_set = MirrorSet::create(asyncio_scheduled_tasks)) { + auto scheduled_tasks_set = std::move(*maybe_scheduled_tasks_set); + if (auto maybe_scheduled_tasks = scheduled_tasks_set.as_unordered_set()) { + auto scheduled_tasks = std::move(*maybe_scheduled_tasks); + for (auto task_addr : scheduled_tasks) { + // In WeakSet.data (set), elements are the Task objects themselves + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_addr)); + if (maybe_task_info && + (*maybe_task_info)->loop == reinterpret_cast(this->asyncio_loop)) { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + } + } + + if (asyncio_eager_tasks != NULL) { + auto maybe_eager_tasks_set = MirrorSet::create(asyncio_eager_tasks); + if (!maybe_eager_tasks_set) { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks_set = std::move(*maybe_eager_tasks_set); + + auto maybe_eager_tasks = eager_tasks_set.as_unordered_set(); + if (!maybe_eager_tasks) { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks = std::move(*maybe_eager_tasks); + for (auto task_addr : eager_tasks) { + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_addr)); + if (maybe_task_info) { + if ((*maybe_task_info)->loop == reinterpret_cast(this->asyncio_loop)) { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + } + + return tasks; +} +#else +// Pre-Python 3.14: get_all_tasks uses WeakSet approach +inline Result> +ThreadInfo::get_all_tasks(PyThreadState*) +{ + std::vector tasks; + if (this->asyncio_loop == 0) + return tasks; + + auto maybe_scheduled_tasks_set = MirrorSet::create(asyncio_scheduled_tasks); + if (!maybe_scheduled_tasks_set) { + return ErrorKind::TaskInfoError; + } + + auto scheduled_tasks_set = std::move(*maybe_scheduled_tasks_set); + auto maybe_scheduled_tasks = scheduled_tasks_set.as_unordered_set(); + if (!maybe_scheduled_tasks) { + return ErrorKind::TaskInfoError; + } + + auto scheduled_tasks = std::move(*maybe_scheduled_tasks); + for (auto task_wr_addr : scheduled_tasks) { + PyWeakReference task_wr; + if (copy_type(task_wr_addr, task_wr)) + continue; + + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_wr.wr_object)); + if (maybe_task_info) { + if ((*maybe_task_info)->loop == reinterpret_cast(this->asyncio_loop)) { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + + if (asyncio_eager_tasks != NULL) { + auto maybe_eager_tasks_set = MirrorSet::create(asyncio_eager_tasks); + if (!maybe_eager_tasks_set) { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks_set = std::move(*maybe_eager_tasks_set); + + auto maybe_eager_tasks = eager_tasks_set.as_unordered_set(); + if (!maybe_eager_tasks) { + return ErrorKind::TaskInfoError; + } + + auto eager_tasks = std::move(*maybe_eager_tasks); + for (auto task_addr : eager_tasks) { + auto maybe_task_info = TaskInfo::create(reinterpret_cast(task_addr)); + if (maybe_task_info) { + if ((*maybe_task_info)->loop == reinterpret_cast(this->asyncio_loop)) { + tasks.push_back(std::move(*maybe_task_info)); + } + } + } + } + + return tasks; +} +#endif // PY_VERSION_HEX >= 0x030e0000 + // ---------------------------------------------------------------------------- inline void ThreadInfo::unwind_greenlets(PyThreadState* tstate, unsigned long cur_native_id) @@ -493,8 +734,10 @@ ThreadInfo::sample(int64_t iid, PyThreadState* tstate, microsecond_t delta) } // ---------------------------------------------------------------------------- +using PyThreadStateCallback = std::function; + static void -for_each_thread(InterpreterInfo& interp, std::function callback) +for_each_thread(InterpreterInfo& interp, PyThreadStateCallback callback) { std::unordered_set threads; std::unordered_set seen_threads; @@ -562,8 +805,14 @@ for_each_thread(InterpreterInfo& interp, std::functionsecond); + // Update the tstate_addr for thread info, so we can access + // asyncio_tasks_head field from `_PyThreadStateImpl` struct + // later when we unwind tasks. + auto thread_info = thread_info_map.find(tstate.thread_id)->second.get(); + thread_info->tstate_addr = reinterpret_cast(tstate_addr); + + // Call back with the copied thread state + callback(&tstate, *thread_info); } } } diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc index f0809e6f90a..582acc6ce82 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/echion/frame.cc @@ -3,6 +3,17 @@ #include #include +#if PY_VERSION_HEX >= 0x030b0000 +#include +#include +#include + +#if PY_VERSION_HEX >= 0x030e0000 +#include +#include +#endif // PY_VERSION_HEX >= 0x030e0000 +#endif // PY_VERSION_HEX >= 0x030b0000 + // ---------------------------------------------------------------------------- #if PY_VERSION_HEX >= 0x030b0000 static inline int @@ -233,7 +244,15 @@ Frame::read(PyObject* frame_addr, PyObject** prev_addr) } #if PY_VERSION_HEX >= 0x030c0000 +#if PY_VERSION_HEX >= 0x030e0000 + // Python 3.14 introduced FRAME_OWNED_BY_INTERPRETER, and frames of this + // type are also ignored by the upstream profiler. + // See + // https://github.com/python/cpython/blob/ebf955df7a89ed0c7968f79faec1de49f61ed7cb/Modules/_remote_debugging_module.c#L2134 + if (frame_addr->owner == FRAME_OWNED_BY_CSTACK || frame_addr->owner == FRAME_OWNED_BY_INTERPRETER) { +#else if (frame_addr->owner == FRAME_OWNED_BY_CSTACK) { +#endif // PY_VERSION_HEX >= 0x030e0000 *prev_addr = frame_addr->previous; // This is a C frame, we just need to ignore it return std::ref(C_FRAME); @@ -246,7 +265,21 @@ Frame::read(PyObject* frame_addr, PyObject** prev_addr) // We cannot use _PyInterpreterFrame_LASTI because _PyCode_CODE reads // from the code object. -#if PY_VERSION_HEX >= 0x030d0000 +#if PY_VERSION_HEX >= 0x030e0000 + // Per Python 3.14 release notes (gh-123923): f_executable uses a tagged pointer. + // Profilers must clear the least significant bit to recover the PyObject* pointer. + PyCodeObject* code_obj = reinterpret_cast(BITS_TO_PTR_MASKED(frame_addr->f_executable)); + _Py_CODEUNIT* code_units = reinterpret_cast<_Py_CODEUNIT*>(code_obj); + int instr_offset = static_cast(frame_addr->instr_ptr - 1 - code_units); + int code_offset = offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT); + const int lasti = instr_offset - code_offset; + auto maybe_frame = Frame::get(code_obj, lasti); + if (!maybe_frame) { + return ErrorKind::FrameError; + } + + auto& frame = maybe_frame->get(); +#elif PY_VERSION_HEX >= 0x030d0000 const int lasti = (static_cast( (frame_addr->instr_ptr - 1 - @@ -268,7 +301,7 @@ Frame::read(PyObject* frame_addr, PyObject** prev_addr) } auto& frame = maybe_frame->get(); -#endif // PY_VERSION_HEX >= 0x030d0000 +#endif // PY_VERSION_HEX >= 0x030e0000 if (&frame != &INVALID_FRAME) { #if PY_VERSION_HEX >= 0x030c0000 frame.is_entry = (frame_addr->owner == FRAME_OWNED_BY_CSTACK); // Shim frame @@ -276,7 +309,6 @@ Frame::read(PyObject* frame_addr, PyObject** prev_addr) frame.is_entry = frame_addr->is_entry; #endif // PY_VERSION_HEX >= 0x030c0000 } - *prev_addr = &frame == &INVALID_FRAME ? NULL : frame_addr->previous; #else // PY_VERSION_HEX < 0x030b0000 diff --git a/ddtrace/internal/settings/profiling.py b/ddtrace/internal/settings/profiling.py index 82653d97642..003df3f28c2 100644 --- a/ddtrace/internal/settings/profiling.py +++ b/ddtrace/internal/settings/profiling.py @@ -1,7 +1,6 @@ import itertools import math import os -import sys import typing as t from ddtrace.ext.git import COMMIT_SHA @@ -65,9 +64,6 @@ def _check_for_stack_v2_available(): def _parse_profiling_enabled(raw: str) -> bool: - if sys.version_info >= (3, 14): - return False - # Try to derive whether we're enabled via DD_INJECTION_ENABLED # - Are we injected (DD_INJECTION_ENABLED set) # - Is profiling enabled ("profiler" in the list) @@ -253,7 +249,7 @@ class ProfilingConfigStack(DDConfig): enabled = DDConfig.v( bool, "enabled", - default=sys.version_info < (3, 14), + default=True, help_type="Boolean", help="Whether to enable the stack profiler", ) @@ -363,14 +359,12 @@ class ProfilingConfigPytorch(DDConfig): # We need to check if ddup is available, and turn off profiling if it is not. if not ddup_is_available: - # We know it is not supported on 3.14, so don't report the error, but still disable - if sys.version_info < (3, 14): - msg = ddup_failure_msg or "libdd not available" - logger.warning("Failed to load ddup module (%s), disabling profiling", msg) - telemetry_writer.add_log( - TELEMETRY_LOG_LEVEL.ERROR, - "Failed to load ddup module (%s), disabling profiling" % ddup_failure_msg, - ) + msg = ddup_failure_msg or "libdd not available" + logger.warning("Failed to load ddup module (%s), disabling profiling", msg) + telemetry_writer.add_log( + TELEMETRY_LOG_LEVEL.ERROR, + "Failed to load ddup module (%s), disabling profiling" % ddup_failure_msg, + ) config.enabled = False # We also need to check if stack_v2 module is available, and turn if off diff --git a/ddtrace/profiling/_asyncio.py b/ddtrace/profiling/_asyncio.py index 2dcbaa08ceb..b1968305879 100644 --- a/ddtrace/profiling/_asyncio.py +++ b/ddtrace/profiling/_asyncio.py @@ -103,20 +103,32 @@ def _(asyncio: ModuleType) -> None: init_stack_v2: bool = config.stack.enabled and stack_v2.is_available - @partial(wrap, sys.modules["asyncio.events"].BaseDefaultEventLoopPolicy.set_event_loop) - def _( - f: typing.Callable[..., typing.Any], args: tuple[typing.Any, ...], kwargs: dict[str, typing.Any] - ) -> typing.Any: - loop: typing.Optional["aio.AbstractEventLoop"] = get_argument_value(args, kwargs, 1, "loop") - try: - if init_stack_v2: - stack_v2.track_asyncio_loop(typing.cast(int, ddtrace_threading.current_thread().ident), loop) - return f(*args, **kwargs) - finally: - assert THREAD_LINK is not None # nosec: assert is used for typing - THREAD_LINK.clear_threads(set(sys._current_frames().keys())) - if loop is not None: - THREAD_LINK.link_object(loop) + # Python 3.14+: BaseDefaultEventLoopPolicy was renamed to _BaseDefaultEventLoopPolicy + # Try both names for compatibility + events_module = sys.modules["asyncio.events"] + if sys.hexversion >= 0x030E0000: + # Python 3.14+: Use _BaseDefaultEventLoopPolicy + policy_class = getattr(events_module, "_BaseDefaultEventLoopPolicy", None) + else: + # Python < 3.14: Use BaseDefaultEventLoopPolicy + policy_class = getattr(events_module, "BaseDefaultEventLoopPolicy", None) + + if policy_class is not None: + + @partial(wrap, policy_class.set_event_loop) + def _( + f: typing.Callable[..., typing.Any], args: tuple[typing.Any, ...], kwargs: dict[str, typing.Any] + ) -> typing.Any: + loop: typing.Optional["aio.AbstractEventLoop"] = get_argument_value(args, kwargs, 1, "loop") + try: + if init_stack_v2: + stack_v2.track_asyncio_loop(typing.cast(int, ddtrace_threading.current_thread().ident), loop) + return f(*args, **kwargs) + finally: + assert THREAD_LINK is not None # nosec: assert is used for typing + THREAD_LINK.clear_threads(set(sys._current_frames().keys())) + if loop is not None: + THREAD_LINK.link_object(loop) if init_stack_v2: diff --git a/releasenotes/notes/profiling-314-support-bc850ac5330c27fc.yaml b/releasenotes/notes/profiling-314-support-bc850ac5330c27fc.yaml new file mode 100644 index 00000000000..a345c497963 --- /dev/null +++ b/releasenotes/notes/profiling-314-support-bc850ac5330c27fc.yaml @@ -0,0 +1,3 @@ +features: + - | + profiling: This adds support for Python 3.14 in the Continuous Profiler. diff --git a/riotfile.py b/riotfile.py index 67b2571b6eb..12a58b9efe3 100644 --- a/riotfile.py +++ b/riotfile.py @@ -3282,7 +3282,7 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT Venv( name="profile-uwsgi", command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling/test_uwsgi.py", # noqa: E501 - pys=select_pys(max_version="3.13"), + pys=select_pys(max_version="3.13"), # uwsgi<2.0.30 is not compatible with Python 3.14 pkgs={ "uwsgi": "<2.0.30", "protobuf": latest, @@ -3365,7 +3365,7 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT ), ], ), - # Python >= 3.11 + # Python >= 3.11 (excluding 3.14) Venv( pys=select_pys("3.11", "3.13"), pkgs={"uwsgi": latest}, @@ -3397,11 +3397,43 @@ def select_pys(min_version: str = MIN_PYTHON_VERSION, max_version: str = MAX_PYT ), ], ), + # Python 3.14 - protobuf 4.22.0 is not compatible (TypeError: Metaclasses with custom tp_new) + Venv( + pys="3.14", + pkgs={"uwsgi": latest}, + venvs=[ + Venv( + pkgs={ + # Use latest only - protobuf 4.22.0 fails with Python 3.14 + "protobuf": latest, + }, + ), + # Gevent + Venv( + env={ + "DD_PROFILE_TEST_GEVENT": "1", + }, + pkgs={ + "gunicorn[gevent]": latest, + "gevent": latest, + "protobuf": latest, + }, + ), + # memcpy-based sampler + Venv( + env={ + "ECHION_USE_FAST_COPY_MEMORY": "1", + }, + pkgs={ + "protobuf": latest, + }, + ), + ], + ), Venv( name="profile-memalloc", command="python -m tests.profiling.run pytest -v --no-cov --capture=no --benchmark-disable {cmdargs} tests/profiling/collector/test_memalloc.py", # noqa: E501 - # skipping v3.14 for now due to an unstable `lz4 ` lib issue: https://gitlab.ddbuild.io/DataDog/apm-reliability/dd-trace-py/-/jobs/1163312347 - pys=select_pys(max_version="3.13"), + pys=select_pys(), pkgs={ "protobuf": latest, }, diff --git a/setup.py b/setup.py index 4f1d5288429..415de7ae978 100644 --- a/setup.py +++ b/setup.py @@ -269,8 +269,7 @@ def is_64_bit_python(): rust_features = [] if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python(): rust_features.append("crashtracker") - if sys.version_info[:2] < (3, 14): - rust_features.append("profiling") + rust_features.append("profiling") class PatchedDistribution(Distribution): @@ -618,7 +617,7 @@ def run(self): self.build_rust() # Build libdd_wrapper before building other extensions that depend on it - if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python() and sys.version_info < (3, 14): + if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python(): self.build_libdd_wrapper() super().run() @@ -1164,40 +1163,39 @@ def get_exts_for(name): ) if CURRENT_OS in ("Linux", "Darwin") and is_64_bit_python(): - if sys.version_info < (3, 14): - # Memory profiler now uses CMake to support Abseil dependency - MEMALLOC_DIR = HERE / "ddtrace" / "profiling" / "collector" - ext_modules.append( - CMakeExtension( - "ddtrace.profiling.collector._memalloc", - source_dir=MEMALLOC_DIR, - optional=False, - ) + # Memory profiler now uses CMake to support Abseil dependency + MEMALLOC_DIR = HERE / "ddtrace" / "profiling" / "collector" + ext_modules.append( + CMakeExtension( + "ddtrace.profiling.collector._memalloc", + source_dir=MEMALLOC_DIR, + optional=False, ) + ) - ext_modules.append( - CMakeExtension( - "ddtrace.internal.datadog.profiling.ddup._ddup", - source_dir=DDUP_DIR, - extra_source_dirs=[ - DDUP_DIR / ".." / "cmake", - DDUP_DIR / ".." / "dd_wrapper", - ], - optional=False, - ) + ext_modules.append( + CMakeExtension( + "ddtrace.internal.datadog.profiling.ddup._ddup", + source_dir=DDUP_DIR, + extra_source_dirs=[ + DDUP_DIR / ".." / "cmake", + DDUP_DIR / ".." / "dd_wrapper", + ], + optional=False, ) + ) - ext_modules.append( - CMakeExtension( - "ddtrace.internal.datadog.profiling.stack_v2._stack_v2", - source_dir=STACK_V2_DIR, - extra_source_dirs=[ - STACK_V2_DIR / ".." / "cmake", - STACK_V2_DIR / ".." / "dd_wrapper", - ], - optional=False, - ), - ) + ext_modules.append( + CMakeExtension( + "ddtrace.internal.datadog.profiling.stack_v2._stack_v2", + source_dir=STACK_V2_DIR, + extra_source_dirs=[ + STACK_V2_DIR / ".." / "cmake", + STACK_V2_DIR / ".." / "dd_wrapper", + ], + optional=False, + ), + ) else: diff --git a/tests/internal/test_serverless.py b/tests/internal/test_serverless.py index a8f202223f6..9011f82acc3 100644 --- a/tests/internal/test_serverless.py +++ b/tests/internal/test_serverless.py @@ -1,5 +1,3 @@ -import sys - import pytest from ddtrace.internal.serverless import in_azure_function @@ -134,16 +132,7 @@ def find_spec(self, fullname, *args): ("ddtrace.internal.utils", "http"), ("ddtrace.llmobs", "LLMObs"), ("ddtrace.opentelemetry", "TracerProvider"), - pytest.param( - "ddtrace.profiling", - "profiler", - # when 3.14 is officially supported, this xfail can be removed. - marks=pytest.mark.xfail( - reason="throws AttributeError: module 'asyncio.events' has no attribute 'BaseDefaultEventLoopPolicy'", - condition=sys.version_info >= (3, 14), - strict=True, - ), - ), + ("ddtrace.profiling", "profiler"), ("ddtrace.propagation.http", "HTTPPropagator"), ("ddtrace.trace", "Context, Span, tracer"), ("ddtrace.trace", "Span"), diff --git a/tests/profiling/collector/test_memalloc.py b/tests/profiling/collector/test_memalloc.py index 3006c100149..06c08a27f79 100644 --- a/tests/profiling/collector/test_memalloc.py +++ b/tests/profiling/collector/test_memalloc.py @@ -12,6 +12,7 @@ from tests.profiling.collector import pprof_utils +PY_314_OR_ABOVE = sys.version_info[:2] >= (3, 14) PY_313_OR_ABOVE = sys.version_info[:2] >= (3, 13) PY_311_OR_ABOVE = sys.version_info[:2] >= (3, 11) @@ -847,7 +848,14 @@ def test_memory_collector_thread_lifecycle(tmp_path): def worker(): for i in range(10): - data = [i] * 100 + # On Python 3.14+, increase the allocation size to more reliably + # trigger sampling. The CPython internal could have optimized + # small allocations, and/or allocations that are deallocated too + # quickly. + if PY_314_OR_ABOVE: + data = [i] * 10000000 + else: + data = [i] * 100 del data # Capture reference before context manager exits diff --git a/tests/smoke_test.py b/tests/smoke_test.py index 7f0c02b9b6c..149d85ecd21 100644 --- a/tests/smoke_test.py +++ b/tests/smoke_test.py @@ -73,7 +73,7 @@ def emit(self, record): print("Skipping test, 32-bit DDWAF not ready yet") # Profiling smoke test - if platform.system() in ("Linux", "Darwin") and sys.maxsize > (1 << 32) and sys.version_info[:2] < (3, 14): + if platform.system() in ("Linux", "Darwin") and sys.maxsize > (1 << 32): print("Running profiling smoke test...") profiling_cmd = [sys.executable, "-c", "import ddtrace.profiling.auto"] result = subprocess.run(profiling_cmd, capture_output=True, text=True) diff --git a/tests/telemetry/test_writer.py b/tests/telemetry/test_writer.py index 914ab46a4bc..4c3d1b1339d 100644 --- a/tests/telemetry/test_writer.py +++ b/tests/telemetry/test_writer.py @@ -11,7 +11,6 @@ import pytest from ddtrace import config -from ddtrace.internal.compat import PYTHON_VERSION_INFO from ddtrace.internal.settings._agent import get_agent_hostname from ddtrace.internal.settings._telemetry import config as telemetry_config import ddtrace.internal.telemetry @@ -290,7 +289,7 @@ def test_app_started_event_configuration_override(test_agent_session, run_python {"name": "DD_PROFILING_AGENTLESS", "origin": "default", "value": False}, {"name": "DD_PROFILING_API_TIMEOUT_MS", "origin": "default", "value": 10000}, {"name": "DD_PROFILING_CAPTURE_PCT", "origin": "env_var", "value": 5.0}, - {"name": "DD_PROFILING_ENABLED", "origin": "env_var", "value": PYTHON_VERSION_INFO < (3, 14)}, + {"name": "DD_PROFILING_ENABLED", "origin": "env_var", "value": True}, {"name": "DD_PROFILING_ENABLE_ASSERTS", "origin": "default", "value": False}, {"name": "DD_PROFILING_ENABLE_CODE_PROVENANCE", "origin": "default", "value": True}, {"name": "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED", "origin": "default", "value": True},