From 921956d2f0956cb602002422d08c65a8e35b0f82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 10:23:55 +0800
Subject: [PATCH 01/23] [feat] Add Claude Agent SDK instrumentation support

- Initial implementation of Claude Agent SDK instrumentation
- Support for agent query sessions via Hooks mechanism
- Support for tool execution tracing (PreToolUse/PostToolUse hooks)
- Integration with opentelemetry-util-genai ExtendedTelemetryHandler
- Span attributes following OpenTelemetry GenAI Semantic Conventions
- Support for Alibaba Cloud DashScope Anthropic-compatible API

Change-Id: aebd1fd0-3afc-4a37-af74-3800c117aaf0
Change-Id: I187974bfe8b44b9f592ffcb368e11d98c41f1a30
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../CHANGELOG.md                              |  17 +
 .../LICENSE                                   | 201 ++++++
 .../README.rst                                | 159 ++++
 .../pyproject.toml                            |  58 ++
 .../src/opentelemetry/__init__.py             |  15 +
 .../opentelemetry/instrumentation/__init__.py |  15 +
 .../claude_agent_sdk/__init__.py              | 182 +++++
 .../claude_agent_sdk/context.py               |  53 ++
 .../instrumentation/claude_agent_sdk/hooks.py | 260 +++++++
 .../claude_agent_sdk/package.py               |  15 +
 .../instrumentation/claude_agent_sdk/patch.py | 682 ++++++++++++++++++
 .../instrumentation/claude_agent_sdk/utils.py | 220 ++++++
 .../claude_agent_sdk/version.py               |  15 +
 .../tests/conftest.py                         | 115 +++
 .../tests/requirements.latest.txt             |  26 +
 .../tests/requirements.oldest.txt             |  29 +
 .../tests/test_attributes.py                  | 313 ++++++++
 .../tests/test_edge_cases.py                  | 353 +++++++++
 .../tests/test_integration.py                 | 183 +++++
 .../tests/test_mocks.py                       | 244 +++++++
 .../tests/test_unit.py                        | 175 +++++
 tox-loongsuite.ini                            |  13 +
 22 files changed, 3343 insertions(+)
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/LICENSE
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/README.rst
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/__init__.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/__init__.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/package.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/version.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md
new file mode 100644
index 000000000..26f1bc598
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+
+## [Unreleased]
+
+### Added
+
+- Initial implementation of Claude Agent SDK instrumentation
+- Support for agent query sessions via Hooks mechanism
+- Support for tool execution tracing (PreToolUse/PostToolUse hooks)
+- Integration with `opentelemetry-util-genai` ExtendedTelemetryHandler
+- Span attributes following OpenTelemetry GenAI Semantic Conventions
+- Support for Alibaba Cloud DashScope Anthropic-compatible API
+
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/LICENSE b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/LICENSE
new file mode 100644
index 000000000..261eeb9e9
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/README.rst b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/README.rst
new file mode 100644
index 000000000..615c2fc43
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/README.rst
@@ -0,0 +1,159 @@
+LoongSuite Instrumentation for Claude Agent SDK
+================================================
+
+This library provides automatic instrumentation for the `Claude Agent SDK
+<https://github.com/anthropics/claude-agent-sdk-python>`_, adding OpenTelemetry
+tracing and metrics for agent conversations, LLM calls, and tool executions.
+
+.. note::
+   This package is currently in development and must be installed from source.
+   PyPI release is planned for future versions.
+
+Installation
+------------
+
+::
+
+    pip install opentelemetry-distro opentelemetry-exporter-otlp
+    opentelemetry-bootstrap -a install
+
+    pip install claude-agent-sdk
+
+    # Install this instrumentation
+    pip install ./instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk
+
+    # Note: This instrumentation uses ExtendedTelemetryHandler from opentelemetry-util-genai
+    pip install ./util/opentelemetry-util-genai
+
+Usage
+-----
+
+Auto-instrumentation
+~~~~~~~~~~~~~~~~~~~~
+
+Use the ``opentelemetry-instrument`` wrapper:
+
+::
+
+    opentelemetry-instrument \
+        --traces_exporter console \
+        --metrics_exporter console \
+        python your_claude_agent_app.py
+
+Manual Instrumentation
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    from opentelemetry.instrumentation.claude_agent_sdk import ClaudeAgentSDKInstrumentor
+    from claude_agent_sdk import query
+    from claude_agent_sdk.types import ClaudeAgentOptions
+
+    ClaudeAgentSDKInstrumentor().instrument()
+
+    options = ClaudeAgentOptions(model="claude-3-5-sonnet-20241022", max_turns=5)
+    
+    async def run_agent():
+        async for message in query(prompt="Hello!", options=options):
+            print(message)
+
+    ClaudeAgentSDKInstrumentor().uninstrument()
+
+Configuration
+-------------
+
+Export to OTLP Backend
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    export OTEL_SERVICE_NAME=my-claude-agent-app
+    export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=<trace_endpoint>
+    export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=<metrics_endpoint>
+
+    opentelemetry-instrument python your_app.py
+
+Content Capture
+~~~~~~~~~~~~~~~
+
+Control message content capture using environment variables:
+
+::
+
+    # Enable experimental GenAI semantic conventions
+    export OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental
+
+    # Capture content in spans only
+    export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=SPAN_ONLY
+
+    # Capture content in events only
+    export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=EVENT_ONLY
+
+    # Capture in both spans and events
+    export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=SPAN_AND_EVENT
+
+    # Disable content capture (default)
+    export OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=NO_CONTENT
+
+Using with Alibaba Cloud DashScope
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This instrumentation works with Alibaba Cloud's DashScope service via the
+Anthropic-compatible API endpoint:
+
+::
+
+    import os
+    
+    # Set environment variables for DashScope
+    os.environ["ANTHROPIC_BASE_URL"] = "https://dashscope.aliyuncs.com/apps/anthropic"
+    os.environ["ANTHROPIC_API_KEY"] = "your-dashscope-api-key"
+
+Supported Components
+--------------------
+
+- **Agent Sessions**: ``query`` function for conversational agent interactions
+- **Tool Executions**: Automatic tracing via PreToolUse/PostToolUse hooks
+- **Token Tracking**: Via MessageComplete hook
+- **Context Compaction**: Via PreCompact hook
+
+Visualization
+-------------
+
+Export telemetry data to:
+
+- `Alibaba Cloud Managed Service for OpenTelemetry <https://www.aliyun.com/product/xtrace>`_
+- Any OpenTelemetry-compatible backend (Jaeger, Zipkin, etc.)
+
+Span Hierarchy
+--------------
+
+::
+
+    invoke_agent (parent span)
+      ├── User prompt event
+      ├── execute_tool (child span)
+      │   ├── gen_ai.tool.input.* attributes
+      │   └── gen_ai.tool.response.* attributes
+      ├── execute_tool (child span)
+      │   └── ...
+      └── Agent completed event
+
+Examples
+--------
+
+See the `main README <https://github.com/alibaba/loongsuite-python-agent/blob/main/README.md>`_ for complete usage examples.
+
+License
+-------
+
+Apache License 2.0
+
+References
+----------
+
+- `OpenTelemetry GenAI Semantic Conventions <https://opentelemetry.io/docs/specs/semconv/gen-ai/>`_
+- `Claude Agent SDK <https://github.com/anthropics/claude-agent-sdk-python>`_
+- `Alibaba Cloud DashScope Anthropic API <https://help.aliyun.com/zh/model-studio/anthropic-api-messages>`_
+
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
new file mode 100644
index 000000000..0271630ae
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "loongsuite-instrumentation-claude-agent-sdk"
+dynamic = ["version"]
+description = "LoongSuite Claude Agent SDK instrumentation"
+readme = "README.rst"
+license = "Apache-2.0"
+requires-python = ">=3.9"
+authors = [
+  { name = "LoongSuite Python Agent Authors", email = "qp467389@alibaba-inc.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+]
+dependencies = [
+  "opentelemetry-api ~= 1.37",
+  "opentelemetry-instrumentation ~= 0.58b0",
+  "opentelemetry-semantic-conventions ~= 0.58b0",
+  # Note: opentelemetry-util-genai should be installed from local source
+  # for extended features (ExtendedTelemetryHandler)
+  "opentelemetry-util-genai >= 0.2b0",
+]
+
+[project.optional-dependencies]
+instruments = [
+  "claude-agent-sdk >= 0.1.0",
+]
+
+[project.entry-points.opentelemetry_instrumentor]
+claude_agent_sdk = "opentelemetry.instrumentation.claude_agent_sdk:ClaudeAgentSDKInstrumentor"
+
+[project.urls]
+Homepage = "https://github.com/alibaba/loongsuite-python-agent"
+
+[tool.hatch.version]
+path = "src/opentelemetry/instrumentation/claude_agent_sdk/version.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "/src",
+  "/tests",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/opentelemetry"]
+
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/__init__.py
new file mode 100644
index 000000000..175296eca
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/__init__.py
@@ -0,0 +1,15 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/__init__.py
new file mode 100644
index 000000000..175296eca
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/__init__.py
@@ -0,0 +1,15 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
new file mode 100644
index 000000000..e6b6be224
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
@@ -0,0 +1,182 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+OpenTelemetry Claude Agent SDK Instrumentation
+==============================================
+
+This package provides automatic instrumentation for the Claude Agent SDK,
+capturing telemetry data for agent sessions and tool executions.
+
+Usage
+-----
+
+Basic instrumentation::
+
+    from opentelemetry.instrumentation.claude_agent_sdk import ClaudeAgentSDKInstrumentor
+
+    # Apply instrumentation
+    ClaudeAgentSDKInstrumentor().instrument()
+
+    # Your Claude Agent SDK code works as normal
+    from claude_agent_sdk import ClaudeSDKClient
+
+    async with ClaudeSDKClient() as client:
+        await client.query(prompt="Hello!")
+        async for message in client.receive_response():
+            print(message)
+
+The instrumentation automatically captures:
+
+- Agent session spans (invoke_agent)
+- Tool execution spans (execute_tool)
+- Token usage (input/output tokens)
+
+"""
+
+import logging
+from typing import Any, Collection, Optional
+
+from wrapt import wrap_function_wrapper
+
+from opentelemetry.instrumentation.claude_agent_sdk.package import _instruments
+from opentelemetry.instrumentation.claude_agent_sdk.patch import (
+    wrap_claude_client_init,
+    wrap_claude_client_query,
+    wrap_claude_client_receive_response,
+    wrap_query,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.version import __version__
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
+from opentelemetry.instrumentation.utils import unwrap
+from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler
+
+logger = logging.getLogger(__name__)
+
+
+class ClaudeAgentSDKInstrumentor(BaseInstrumentor):
+    """
+    Instrumentor for Claude Agent SDK.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._handler: Optional[ExtendedTelemetryHandler] = None
+
+    def instrumentation_dependencies(self) -> Collection[str]:
+        """Return the dependencies required for this instrumentation."""
+        return _instruments
+
+    def _instrument(self, **kwargs: Any) -> None:
+        """
+        Apply instrumentation to Claude Agent SDK.
+
+        Kwargs:
+            tracer_provider: Optional TracerProvider to use
+            meter_provider: Optional MeterProvider to use
+            logger_provider: Optional LoggerProvider to use
+        """
+        tracer_provider = kwargs.get("tracer_provider")
+        meter_provider = kwargs.get("meter_provider")
+        logger_provider = kwargs.get("logger_provider")
+
+        # Create ExtendedTelemetryHandler
+        self._handler = ExtendedTelemetryHandler(
+            tracer_provider=tracer_provider,
+            meter_provider=meter_provider,
+            logger_provider=logger_provider,
+        )
+
+        # Wrap ClaudeSDKClient.__init__
+        try:
+            wrap_function_wrapper(
+                module="claude_agent_sdk",
+                name="ClaudeSDKClient.__init__",
+                wrapper=lambda wrapped,
+                instance,
+                args,
+                kwargs: wrap_claude_client_init(
+                    wrapped, instance, args, kwargs, handler=self._handler
+                ),
+            )
+        except Exception as e:
+            logger.warning(
+                f"Failed to instrument ClaudeSDKClient.__init__: {e}"
+            )
+
+        # Wrap ClaudeSDKClient.query
+        try:
+            wrap_function_wrapper(
+                module="claude_agent_sdk",
+                name="ClaudeSDKClient.query",
+                wrapper=lambda wrapped,
+                instance,
+                args,
+                kwargs: wrap_claude_client_query(
+                    wrapped, instance, args, kwargs, handler=self._handler
+                ),
+            )
+        except Exception as e:
+            logger.warning(f"Failed to instrument ClaudeSDKClient.query: {e}")
+
+        # Wrap ClaudeSDKClient.receive_response
+        try:
+            wrap_function_wrapper(
+                module="claude_agent_sdk",
+                name="ClaudeSDKClient.receive_response",
+                wrapper=lambda wrapped,
+                instance,
+                args,
+                kwargs: wrap_claude_client_receive_response(
+                    wrapped, instance, args, kwargs, handler=self._handler
+                ),
+            )
+        except Exception as e:
+            logger.warning(
+                f"Failed to instrument ClaudeSDKClient.receive_response: {e}"
+            )
+
+        # Wrap standalone query() function
+        try:
+            wrap_function_wrapper(
+                module="claude_agent_sdk",
+                name="query",
+                wrapper=lambda wrapped, instance, args, kwargs: wrap_query(
+                    wrapped, instance, args, kwargs, handler=self._handler
+                ),
+            )
+        except Exception as e:
+            logger.warning(f"Failed to instrument claude_agent_sdk.query: {e}")
+
+    def _uninstrument(self, **kwargs: Any) -> None:
+        """Remove instrumentation from Claude Agent SDK."""
+        try:
+            import claude_agent_sdk  # noqa: PLC0415
+
+            # Unwrap all instrumented methods
+            unwrap(claude_agent_sdk.ClaudeSDKClient, "__init__")
+            unwrap(claude_agent_sdk.ClaudeSDKClient, "query")
+            unwrap(claude_agent_sdk.ClaudeSDKClient, "receive_response")
+            unwrap(claude_agent_sdk, "query")
+
+        except Exception as e:
+            logger.warning(f"Failed to uninstrument Claude Agent SDK: {e}")
+
+        self._handler = None
+
+
+__all__ = [
+    "__version__",
+    "ClaudeAgentSDKInstrumentor",
+]
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py
new file mode 100644
index 000000000..4b7d160fe
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py
@@ -0,0 +1,53 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Thread-local storage utilities for Claude Agent SDK tracing.
+
+This module provides thread-local storage for the parent invocation context,
+which is used by hooks to maintain trace context when async context
+propagation is broken (Claude's async event loop breaks OpenTelemetry context).
+"""
+
+import threading
+from typing import Any, Optional
+
+# Thread-local store for passing the parent invocation into hooks.
+# Claude's async event loop by default breaks OpenTelemetry context propagation.
+# The parent invocation is threaded via thread-local as a fallback.
+_thread_local = threading.local()
+
+
+def set_parent_invocation(invocation: Any) -> None:
+    """Set the parent agent invocation in thread-local storage.
+
+    Args:
+        invocation: InvokeAgentInvocation or ExecuteToolInvocation instance
+    """
+    _thread_local.parent_invocation = invocation
+
+
+def clear_parent_invocation() -> None:
+    """Clear the parent invocation from thread-local storage."""
+    if hasattr(_thread_local, "parent_invocation"):
+        delattr(_thread_local, "parent_invocation")
+
+
+def get_parent_invocation() -> Optional[Any]:
+    """Get the parent invocation from thread-local storage.
+
+    Returns:
+        Parent invocation or None if not set
+    """
+    return getattr(_thread_local, "parent_invocation", None)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
new file mode 100644
index 000000000..d32aeb3aa
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
@@ -0,0 +1,260 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
+
+from opentelemetry import context as otel_context
+from opentelemetry.instrumentation.claude_agent_sdk.context import (
+    get_parent_invocation,
+)
+from opentelemetry.trace import set_span_in_context
+from opentelemetry.util.genai.extended_handler import (
+    get_extended_telemetry_handler,
+)
+from opentelemetry.util.genai.extended_types import ExecuteToolInvocation
+from opentelemetry.util.genai.types import Error
+
+if TYPE_CHECKING:
+    from claude_agent_sdk import (
+        HookContext,
+        HookInput,
+        HookJSONOutput,
+    )
+
+logger = logging.getLogger(__name__)
+
+# Storage for correlating PreToolUse and PostToolUse events
+# Key: tool_use_id, Value: (tool_invocation, handler)
+_active_tool_runs: Dict[str, Tuple[ExecuteToolInvocation, Any]] = {}
+
+# Storage for tool or subagent runs managed by client
+# Key: tool_use_id, Value: tool_invocation
+_client_managed_runs: Dict[str, ExecuteToolInvocation] = {}
+
+
+async def pre_tool_use_hook(
+    input_data: "HookInput",
+    tool_use_id: Optional[str],
+    context: "HookContext",
+) -> "HookJSONOutput":
+    """Trace tool execution before it starts.
+
+    This hook is called by Claude Agent SDK before executing a tool.
+    It creates an execute_tool span as a child of the current agent span.
+
+    Args:
+        input_data: Contains `tool_name`, `tool_input`, `session_id`
+        tool_use_id: Unique identifier for this tool invocation
+        context: Hook context (currently contains only signal)
+
+    Returns:
+        Hook output (empty dict allows execution to proceed)
+    """
+    if not tool_use_id:
+        return {}
+
+    # Skip if this tool run is already managed by the client
+    if tool_use_id in _client_managed_runs:
+        return {}
+
+    tool_name: str = str(input_data.get("tool_name", "unknown_tool"))
+    tool_input = input_data.get("tool_input", {})
+    session_id = input_data.get("session_id", "")
+
+    try:
+        handler = get_extended_telemetry_handler()
+        parent_invocation = get_parent_invocation()
+
+        if not parent_invocation:
+            return {}
+
+        # Create tool invocation following ExecuteToolInvocation semantic conventions
+        # Map to standard fields strictly, avoiding custom attributes
+        tool_invocation = ExecuteToolInvocation(
+            tool_name=tool_name,
+            tool_call_id=tool_use_id,
+            tool_call_arguments=tool_input,  # Standard field: tool call arguments
+            tool_description=tool_name,  # Use tool_name directly
+            attributes={
+                # Only include Claude Agent SDK-specific attributes that cannot map to standard fields
+                "tool.session_id": session_id,
+            }
+            if session_id
+            else {},
+        )
+
+        # Explicitly create tool span as child of parent invocation span
+        # This avoids relying on broken async context propagation
+        if parent_invocation and parent_invocation.span:
+            # Create child span in parent's context
+            ctx = set_span_in_context(parent_invocation.span)
+            token = otel_context.attach(ctx)
+
+            try:
+                handler.start_execute_tool(tool_invocation)
+            finally:
+                # Detach after starting span
+                try:
+                    otel_context.detach(token)
+                except Exception:
+                    pass  # Ignore detach errors
+        else:
+            # Fallback to auto-parenting (may not work due to broken context)
+            handler.start_execute_tool(tool_invocation)
+
+        _active_tool_runs[tool_use_id] = (tool_invocation, handler)
+
+    except Exception as e:
+        logger.warning(
+            f"Error in PreToolUse hook for {tool_name}: {e}", exc_info=True
+        )
+
+    return {}
+
+
+async def post_tool_use_hook(
+    input_data: "HookInput",
+    tool_use_id: Optional[str],
+    context: "HookContext",
+) -> "HookJSONOutput":
+    """Trace tool execution after it completes.
+
+    This hook is called by Claude Agent SDK after tool execution completes.
+    It ends the corresponding execute_tool span and records the result.
+
+    Args:
+        input_data: Contains `tool_name`, `tool_input`, `tool_response`, `session_id`, etc.
+        tool_use_id: Unique identifier for this tool invocation
+        context: Hook context (currently contains only signal)
+
+    Returns:
+        Hook output (empty dict by default)
+    """
+    if not tool_use_id:
+        return {}
+
+    tool_name: str = str(input_data.get("tool_name", "unknown_tool"))
+    tool_response = input_data.get("tool_response")
+
+    # Check if this is a client-managed run
+    client_invocation = _client_managed_runs.pop(tool_use_id, None)
+    if client_invocation:
+        # This run is managed by the client (subagent session or its tools)
+        try:
+            handler = get_extended_telemetry_handler()
+
+            # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
+            client_invocation.tool_call_result = tool_response
+
+            # Check if error
+            is_error = False
+            if isinstance(tool_response, dict):
+                is_error = tool_response.get("is_error", False)
+
+            if is_error:
+                error_msg = (
+                    str(tool_response)
+                    if tool_response
+                    else "Tool execution error"
+                )
+                handler.fail_execute_tool(
+                    client_invocation,
+                    Error(message=error_msg, type=RuntimeError),
+                )
+            else:
+                handler.stop_execute_tool(client_invocation)
+
+        except Exception as e:
+            logger.warning(
+                f"Failed to complete client-managed run: {e}", exc_info=True
+            )
+        return {}
+
+    try:
+        run_info = _active_tool_runs.pop(tool_use_id, None)
+        if not run_info:
+            return {}
+
+        tool_invocation, handler = run_info
+
+        # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
+        tool_invocation.tool_call_result = tool_response
+
+        # Check if error
+        is_error = False
+        if isinstance(tool_response, dict):
+            is_error = tool_response.get("is_error", False)
+
+        if is_error:
+            error_msg = (
+                str(tool_response) if tool_response else "Tool execution error"
+            )
+            handler.fail_execute_tool(
+                tool_invocation, Error(message=error_msg, type=RuntimeError)
+            )
+        else:
+            handler.stop_execute_tool(tool_invocation)
+
+    except Exception as e:
+        logger.warning(
+            f"Error in PostToolUse hook for {tool_name}: {e}", exc_info=True
+        )
+
+    return {}
+
+
+def clear_active_tool_runs() -> None:
+    """Clear all active tool runs.
+
+    This should be called when a conversation ends to avoid memory leaks
+    and to clean up any orphaned tool runs.
+    """
+    global _active_tool_runs, _client_managed_runs
+
+    try:
+        handler = get_extended_telemetry_handler()
+    except Exception:
+        _active_tool_runs.clear()
+        _client_managed_runs.clear()
+        return
+
+    # End any orphaned client-managed runs
+    for tool_use_id, tool_invocation in list(_client_managed_runs.items()):
+        try:
+            handler.fail_execute_tool(
+                tool_invocation,
+                Error(
+                    message="Client-managed run not completed (conversation ended)",
+                    type=RuntimeError,
+                ),
+            )
+        except Exception:
+            pass
+
+    # End any orphaned tool runs
+    for tool_use_id, (tool_invocation, _) in list(_active_tool_runs.items()):
+        try:
+            handler.fail_execute_tool(
+                tool_invocation,
+                Error(
+                    message="Tool run not completed (conversation ended)",
+                    type=RuntimeError,
+                ),
+            )
+        except Exception:
+            pass
+
+    _active_tool_runs.clear()
+    _client_managed_runs.clear()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/package.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/package.py
new file mode 100644
index 000000000..b97835876
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/package.py
@@ -0,0 +1,15 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+_instruments = ("claude-agent-sdk >= 0.1.0",)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
new file mode 100644
index 000000000..5221a7073
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -0,0 +1,682 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Patch functions for Claude Agent SDK instrumentation."""
+
+import logging
+import time
+from typing import Any, Dict, List, Optional
+
+from claude_agent_sdk import HookMatcher
+from claude_agent_sdk.types import ClaudeAgentOptions
+
+from opentelemetry.instrumentation.claude_agent_sdk.context import (
+    clear_parent_invocation,
+    set_parent_invocation,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.hooks import (
+    _client_managed_runs,
+    clear_active_tool_runs,
+    post_tool_use_hook,
+    pre_tool_use_hook,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.utils import (
+    extract_usage_from_result_message,
+    infer_provider_from_base_url,
+)
+from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler
+from opentelemetry.util.genai.extended_types import (
+    ExecuteToolInvocation,
+    InvokeAgentInvocation,
+)
+from opentelemetry.util.genai.types import (
+    Error,
+    InputMessage,
+    LLMInvocation,
+    OutputMessage,
+    Text,
+    ToolCall,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _extract_message_parts(msg: Any) -> List[Any]:
+    """Extract parts (text + tool calls) from an AssistantMessage."""
+    parts = []
+    if not hasattr(msg, "content"):
+        return parts
+
+    for block in msg.content:
+        block_type = type(block).__name__
+        if block_type == "TextBlock":
+            parts.append(Text(content=getattr(block, "text", "")))
+        elif block_type == "ToolUseBlock":
+            tool_call = ToolCall(
+                id=getattr(block, "id", ""),
+                name=getattr(block, "name", ""),
+                arguments=getattr(block, "input", {}),
+            )
+            parts.append(tool_call)
+
+    return parts
+
+
+def _create_tool_spans_from_message(
+    msg: Any,
+    handler: ExtendedTelemetryHandler,
+    exclude_tool_names: Optional[List[str]] = None,
+) -> None:
+    """Create tool execution spans from ToolUseBlocks in an AssistantMessage."""
+    if not hasattr(msg, "content"):
+        return
+
+    exclude_tool_names = exclude_tool_names or []
+
+    for block in msg.content:
+        if type(block).__name__ != "ToolUseBlock":
+            continue
+
+        tool_use_id = getattr(block, "id", None)
+        tool_name = getattr(block, "name", "unknown_tool")
+        tool_input = getattr(block, "input", {})
+
+        if not tool_use_id or tool_name in exclude_tool_names:
+            continue
+
+        try:
+            tool_invocation = ExecuteToolInvocation(
+                tool_name=tool_name,
+                tool_call_id=tool_use_id,
+                tool_call_arguments=tool_input,
+                tool_description=tool_name,
+            )
+            handler.start_execute_tool(tool_invocation)
+            _client_managed_runs[tool_use_id] = tool_invocation
+        except Exception as e:
+            logger.warning(f"Failed to create tool span for {tool_name}: {e}")
+
+
+def _close_tool_spans_from_message(
+    msg: Any,
+    handler: ExtendedTelemetryHandler,
+) -> List[str]:
+    """Close tool execution spans from ToolResultBlocks in a UserMessage."""
+    user_text_parts = []
+
+    if not hasattr(msg, "content"):
+        return user_text_parts
+
+    for block in msg.content:
+        block_type = type(block).__name__
+
+        if block_type == "ToolResultBlock":
+            tool_use_id = getattr(block, "tool_use_id", None)
+            if tool_use_id and tool_use_id in _client_managed_runs:
+                tool_invocation = _client_managed_runs.pop(tool_use_id)
+
+                # Set tool response
+                tool_content = getattr(block, "content", None)
+                is_error = getattr(block, "is_error", False)
+
+                tool_invocation.tool_call_result = tool_content
+
+                # Complete span
+                if is_error:
+                    error_msg = (
+                        str(tool_content)
+                        if tool_content
+                        else "Tool execution error"
+                    )
+                    handler.fail_execute_tool(
+                        tool_invocation,
+                        Error(message=error_msg, type=RuntimeError),
+                    )
+                else:
+                    handler.stop_execute_tool(tool_invocation)
+
+        elif block_type == "TextBlock":
+            user_text_parts.append(getattr(block, "text", ""))
+
+    return user_text_parts
+
+
+def _update_token_usage(
+    agent_invocation: InvokeAgentInvocation,
+    turn_tracker: "AssistantTurnTracker",
+    msg: Any,
+) -> None:
+    """Update token usage from a ResultMessage."""
+    usage_meta = extract_usage_from_result_message(msg)
+    if not usage_meta:
+        return
+
+    # Update agent invocation token usage
+    if "input_tokens" in usage_meta:
+        agent_invocation.input_tokens = usage_meta["input_tokens"]
+    if "output_tokens" in usage_meta:
+        agent_invocation.output_tokens = usage_meta["output_tokens"]
+
+    # Update current LLM turn token usage
+    turn_tracker.update_usage(
+        usage_meta.get("input_tokens"), usage_meta.get("output_tokens")
+    )
+
+
+def _process_assistant_message(
+    msg: Any,
+    model: str,
+    prompt: str,
+    agent_invocation: InvokeAgentInvocation,
+    turn_tracker: "AssistantTurnTracker",
+    handler: ExtendedTelemetryHandler,
+    collected_messages: List[Dict[str, Any]],
+    process_subagents: bool = False,
+    subagent_sessions: Optional[Dict[str, InvokeAgentInvocation]] = None,
+) -> None:
+    """Process AssistantMessage: create LLM turn, extract parts, create tool spans."""
+    # Start a new LLM turn (closes previous one if exists)
+    turn_tracker.start_llm_turn(
+        msg,
+        model,
+        prompt,
+        collected_messages,
+        provider=infer_provider_from_base_url(),
+    )
+
+    # Extract message parts
+    parts = _extract_message_parts(msg)
+
+    # Add output to LLM turn and Agent invocation
+    if parts:
+        turn_tracker.add_assistant_output(parts)
+        output_msg = OutputMessage(
+            role="assistant", parts=parts, finish_reason="stop"
+        )
+        agent_invocation.output_messages.append(output_msg)
+
+        # Collect assistant message for next turn's input
+        text_parts = [p.content for p in parts if isinstance(p, Text)]
+        if text_parts:
+            collected_messages.append(
+                {"role": "assistant", "content": " ".join(text_parts)}
+            )
+
+    # Close LLM span before creating tool spans (ensures tool spans are siblings of LLM spans)
+    turn_tracker.close_llm_turn()
+
+    # Process Task subagents if enabled
+    if process_subagents and subagent_sessions is not None:
+        _handle_task_subagents(
+            msg, agent_invocation, subagent_sessions, handler
+        )
+
+    # Create tool spans (exclude Task if processing subagents)
+    exclude_tools = ["Task"] if process_subagents else []
+    _create_tool_spans_from_message(
+        msg, handler, exclude_tool_names=exclude_tools
+    )
+
+
+def _process_user_message(
+    msg: Any,
+    turn_tracker: "AssistantTurnTracker",
+    handler: ExtendedTelemetryHandler,
+    collected_messages: List[Dict[str, Any]],
+) -> None:
+    """Process UserMessage: close tool spans, collect message content, mark next LLM start."""
+    user_text_parts = _close_tool_spans_from_message(msg, handler)
+
+    if user_text_parts:
+        user_content = " ".join(user_text_parts)
+        collected_messages.append({"role": "user", "content": user_content})
+        turn_tracker.mark_next_llm_start()
+
+
+def _process_result_message(
+    msg: Any,
+    agent_invocation: InvokeAgentInvocation,
+    turn_tracker: "AssistantTurnTracker",
+) -> None:
+    """Process ResultMessage: update session_id and token usage."""
+    if hasattr(msg, "session_id") and msg.session_id:
+        agent_invocation.conversation_id = msg.session_id
+        if agent_invocation.span:
+            agent_invocation.span.set_attribute(
+                "gen_ai.conversation.id", msg.session_id
+            )
+
+    _update_token_usage(agent_invocation, turn_tracker, msg)
+
+
+class AssistantTurnTracker:
+    """Track LLM invocations (assistant turns) in a Claude Agent conversation."""
+
+    def __init__(
+        self,
+        handler: ExtendedTelemetryHandler,
+        query_start_time: Optional[float] = None,
+    ):
+        self.handler = handler
+        self.current_llm_invocation: Optional[LLMInvocation] = None
+        self.last_closed_llm_invocation: Optional[LLMInvocation] = None
+        self.next_llm_start_time: Optional[float] = query_start_time
+
+    def start_llm_turn(
+        self,
+        msg: Any,
+        model: str,
+        prompt: str,
+        collected_messages: List[Dict[str, Any]],
+        provider: str = "anthropic",
+    ) -> Optional[LLMInvocation]:
+        """Start a new LLM invocation span with pre-recorded start time."""
+        start_time = self.next_llm_start_time or time.time()
+
+        if self.current_llm_invocation:
+            self.handler.stop_llm(self.current_llm_invocation)
+            self.last_closed_llm_invocation = self.current_llm_invocation
+            self.current_llm_invocation = None
+
+        self.next_llm_start_time = None
+
+        # Build input_messages from prompt + collected messages
+        input_messages = []
+
+        if prompt:
+            input_messages.append(
+                InputMessage(role="user", parts=[Text(content=prompt)])
+            )
+
+        for hist_msg in collected_messages:
+            role = hist_msg.get("role", "user")
+            content = hist_msg.get("content", "")
+            if isinstance(content, str) and content:
+                input_messages.append(
+                    InputMessage(role=role, parts=[Text(content=content)])
+                )
+
+        llm_invocation = LLMInvocation(
+            provider=provider,
+            request_model=model,
+            input_messages=input_messages,
+        )
+
+        self.handler.start_llm(llm_invocation)
+
+        # Override span start time
+        if llm_invocation.span and start_time:
+            start_time_ns = int(start_time * 1_000_000_000)
+            try:
+                if hasattr(llm_invocation.span, "_start_time"):
+                    llm_invocation.span._start_time = start_time_ns  # type: ignore
+            except Exception as e:
+                logger.warning(f"Failed to set span start time: {e}")
+
+        self.current_llm_invocation = llm_invocation
+        return llm_invocation
+
+    def add_assistant_output(self, parts: List[Any]) -> None:
+        """Add output message parts to current LLM invocation."""
+        if not self.current_llm_invocation or not parts:
+            return
+
+        output_msg = OutputMessage(
+            role="assistant", parts=parts, finish_reason="stop"
+        )
+        self.current_llm_invocation.output_messages.append(output_msg)
+
+    def add_user_message(self, content: str) -> None:
+        """Mark next LLM start time."""
+        self.mark_next_llm_start()
+
+    def mark_next_llm_start(self) -> None:
+        """Mark the start time for the next LLM invocation."""
+        self.next_llm_start_time = time.time()
+
+    def update_usage(
+        self, input_tokens: Optional[int], output_tokens: Optional[int]
+    ) -> None:
+        """Update token usage for current or last closed LLM invocation."""
+        target_invocation = (
+            self.current_llm_invocation or self.last_closed_llm_invocation
+        )
+        if not target_invocation:
+            return
+
+        if input_tokens is not None:
+            target_invocation.input_tokens = input_tokens
+        if output_tokens is not None:
+            target_invocation.output_tokens = output_tokens
+
+    def close_llm_turn(self) -> None:
+        """Close the current LLM invocation span."""
+        if self.current_llm_invocation:
+            self.handler.stop_llm(self.current_llm_invocation)
+            self.last_closed_llm_invocation = self.current_llm_invocation
+            self.current_llm_invocation = None
+
+    def close(self) -> None:
+        """Close any open LLM invocation (cleanup fallback)."""
+        if self.current_llm_invocation:
+            self.handler.stop_llm(self.current_llm_invocation)
+            self.current_llm_invocation = None
+
+
+def _inject_tracing_hooks(options: Any) -> None:
+    """Inject OpenTelemetry tracing hooks into ClaudeAgentOptions."""
+    if not hasattr(options, "hooks"):
+        return
+
+    if options.hooks is None:
+        options.hooks = {}
+
+    if "PreToolUse" not in options.hooks:
+        options.hooks["PreToolUse"] = []
+
+    if "PostToolUse" not in options.hooks:
+        options.hooks["PostToolUse"] = []
+
+    try:
+        otel_pre_matcher = HookMatcher(matcher=None, hooks=[pre_tool_use_hook])
+        otel_post_matcher = HookMatcher(
+            matcher=None, hooks=[post_tool_use_hook]
+        )
+
+        options.hooks["PreToolUse"].insert(0, otel_pre_matcher)
+        options.hooks["PostToolUse"].insert(0, otel_post_matcher)
+        logger.warning("Failed to import HookMatcher from claude_agent_sdk")
+    except Exception as e:
+        logger.warning(f"Failed to inject tracing hooks: {e}")
+
+
+def wrap_claude_client_init(wrapped, instance, args, kwargs, handler=None):
+    """Wrapper for ClaudeSDKClient.__init__ to inject tracing hooks."""
+    if handler is None:
+        logger.warning("Handler not provided, skipping instrumentation")
+        return wrapped(*args, **kwargs)
+
+    options = kwargs.get("options") or (args[0] if args else None)
+    if options:
+        _inject_tracing_hooks(options)
+
+    result = wrapped(*args, **kwargs)
+
+    instance._otel_handler = handler
+    instance._otel_prompt = None
+
+    return result
+
+
+def wrap_claude_client_query(wrapped, instance, args, kwargs, handler=None):
+    """Wrapper for ClaudeSDKClient.query to capture prompt."""
+    if hasattr(instance, "_otel_prompt"):
+        instance._otel_prompt = str(
+            kwargs.get("prompt") or (args[0] if args else "")
+        )
+
+    return wrapped(*args, **kwargs)
+
+
+def _handle_task_subagents(
+    msg: Any,
+    agent_invocation: InvokeAgentInvocation,
+    subagent_sessions: Dict[str, InvokeAgentInvocation],
+    handler: ExtendedTelemetryHandler,
+) -> None:
+    """Process Task tool uses (subagents) in an assistant message."""
+    if not hasattr(msg, "content"):
+        return
+
+    parent_tool_use_id = getattr(msg, "parent_tool_use_id", None)
+
+    for block in msg.content:
+        if type(block).__name__ != "ToolUseBlock":
+            continue
+
+        try:
+            tool_use_id = getattr(block, "id", None)
+            tool_name = getattr(block, "name", "unknown_tool")
+            tool_input = getattr(block, "input", {})
+
+            if not tool_use_id:
+                continue
+
+            # Only handle Task subagents here (Regular tools are handled by hooks)
+            if tool_name == "Task" and not parent_tool_use_id:
+                # Extract subagent name from input
+                subagent_name = (
+                    tool_input.get("subagent_type")
+                    or (
+                        tool_input.get("description", "").split()[0]
+                        if tool_input.get("description")
+                        else None
+                    )
+                    or "unknown-agent"
+                )
+
+                # Create subagent session span
+                subagent_invocation = InvokeAgentInvocation(
+                    provider=infer_provider_from_base_url(),
+                    agent_name=subagent_name,
+                    request_model=agent_invocation.request_model,
+                    conversation_id="",
+                    input_messages=[
+                        InputMessage(
+                            role="user", parts=[Text(content=str(tool_input))]
+                        )
+                    ],
+                    attributes={
+                        "subagent_type": tool_input.get("subagent_type", ""),
+                        "parent_tool_use_id": parent_tool_use_id or "",
+                    },
+                )
+
+                handler.start_invoke_agent(subagent_invocation)
+                subagent_sessions[tool_use_id] = subagent_invocation
+
+                # Mark as client-managed so hooks don't duplicate it
+                _client_managed_runs[tool_use_id] = ExecuteToolInvocation(
+                    tool_name="Task",
+                    tool_call_id=tool_use_id,
+                    tool_call_arguments=tool_input,
+                )
+
+        except Exception as e:
+            logger.warning(f"Failed to create subagent session: {e}")
+
+
+async def wrap_claude_client_receive_response(
+    wrapped, instance, args, kwargs, handler=None
+):
+    """Wrapper for ClaudeSDKClient.receive_response to trace agent invocation."""
+    if handler is None:
+        handler = getattr(instance, "_otel_handler", None)
+
+    if handler is None:
+        logger.warning("Handler not available, skipping instrumentation")
+        async for msg in wrapped(*args, **kwargs):
+            yield msg
+        return
+
+    prompt = getattr(instance, "_otel_prompt", "") or ""
+    model = "unknown"
+    if hasattr(instance, "options") and instance.options:
+        model = getattr(instance.options, "model", "unknown")
+
+    agent_invocation = InvokeAgentInvocation(
+        provider=infer_provider_from_base_url(),
+        agent_name="claude-agent",
+        request_model=model,
+        conversation_id="",
+        input_messages=[
+            InputMessage(role="user", parts=[Text(content=prompt)])
+        ]
+        if prompt
+        else [],
+    )
+
+    handler.start_invoke_agent(agent_invocation)
+    set_parent_invocation(agent_invocation)
+
+    query_start_time = time.time()
+    turn_tracker = AssistantTurnTracker(
+        handler, query_start_time=query_start_time
+    )
+
+    collected_messages: List[Dict[str, Any]] = []
+    subagent_sessions: Dict[str, InvokeAgentInvocation] = {}
+
+    try:
+        async for msg in wrapped(*args, **kwargs):
+            msg_type = type(msg).__name__
+
+            if msg_type == "AssistantMessage":
+                _process_assistant_message(
+                    msg,
+                    model,
+                    prompt,
+                    agent_invocation,
+                    turn_tracker,
+                    handler,
+                    collected_messages,
+                    process_subagents=True,
+                    subagent_sessions=subagent_sessions,
+                )
+
+            elif msg_type == "UserMessage":
+                _process_user_message(
+                    msg, turn_tracker, handler, collected_messages
+                )
+
+            elif msg_type == "ResultMessage":
+                _process_result_message(msg, agent_invocation, turn_tracker)
+
+            yield msg
+
+        handler.stop_invoke_agent(agent_invocation)
+
+        for subagent_invocation in subagent_sessions.values():
+            try:
+                handler.stop_invoke_agent(subagent_invocation)
+            except Exception as e:
+                logger.warning(f"Failed to complete subagent session: {e}")
+
+    except Exception as e:
+        error_msg = str(e)
+        if agent_invocation.span:
+            agent_invocation.span.set_attribute("error.type", type(e).__name__)
+            agent_invocation.span.set_attribute("error.message", error_msg)
+        handler.fail_invoke_agent(
+            agent_invocation, error=Error(message=error_msg, type=type(e))
+        )
+        raise
+    finally:
+        turn_tracker.close()
+        clear_active_tool_runs()
+        clear_parent_invocation()
+
+
+async def wrap_query(wrapped, instance, args, kwargs, handler=None):
+    """Wrapper for claude_agent_sdk.query() standalone function."""
+    if handler is None:
+        logger.warning("Handler not provided, skipping instrumentation")
+        async for message in wrapped(*args, **kwargs):
+            yield message
+        return
+
+    prompt = kwargs.get("prompt") or (args[0] if args else "")
+    options = kwargs.get("options")
+
+    if options:
+        _inject_tracing_hooks(options)
+    elif options is None:
+        try:
+            options = ClaudeAgentOptions()
+            _inject_tracing_hooks(options)
+            kwargs["options"] = options
+        except Exception as e:
+            logger.warning(f"Failed to create ClaudeAgentOptions: {e}")
+
+    model = "unknown"
+    if options:
+        model = getattr(options, "model", "unknown")
+
+    prompt_str = str(prompt) if isinstance(prompt, str) else ""
+    agent_invocation = InvokeAgentInvocation(
+        provider=infer_provider_from_base_url(),
+        agent_name="claude-agent",
+        request_model=model,
+        conversation_id="",
+        input_messages=[
+            InputMessage(role="user", parts=[Text(content=prompt_str)])
+        ]
+        if prompt_str
+        else [],
+    )
+
+    handler.start_invoke_agent(agent_invocation)
+    set_parent_invocation(agent_invocation)
+
+    query_start_time = time.time()
+    turn_tracker = AssistantTurnTracker(
+        handler, query_start_time=query_start_time
+    )
+
+    collected_messages: List[Dict[str, Any]] = []
+
+    try:
+        async for message in wrapped(*args, **kwargs):
+            msg_type = type(message).__name__
+
+            if msg_type == "AssistantMessage":
+                _process_assistant_message(
+                    message,
+                    model,
+                    prompt_str,
+                    agent_invocation,
+                    turn_tracker,
+                    handler,
+                    collected_messages,
+                    process_subagents=False,
+                    subagent_sessions=None,
+                )
+
+            elif msg_type == "UserMessage":
+                _process_user_message(
+                    message, turn_tracker, handler, collected_messages
+                )
+
+            elif msg_type == "ResultMessage":
+                _process_result_message(
+                    message, agent_invocation, turn_tracker
+                )
+
+            yield message
+
+        handler.stop_invoke_agent(agent_invocation)
+
+    except Exception as e:
+        error_msg = str(e)
+        if agent_invocation.span:
+            agent_invocation.span.set_attribute("error.type", type(e).__name__)
+            agent_invocation.span.set_attribute("error.message", error_msg)
+        handler.fail_invoke_agent(
+            agent_invocation, error=Error(message=error_msg, type=type(e))
+        )
+        raise
+    finally:
+        turn_tracker.close()
+        clear_active_tool_runs()
+        clear_parent_invocation()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
new file mode 100644
index 000000000..29c221f98
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
@@ -0,0 +1,220 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utility functions for Claude Agent SDK instrumentation."""
+
+import logging
+import os
+from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
+
+from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import (
+    GenAiProviderNameValues,
+)
+from opentelemetry.util.genai._extended_semconv.gen_ai_extended_attributes import (
+    GenAiExtendedProviderNameValues,
+)
+from opentelemetry.util.genai.types import Text, ToolCall
+
+logger = logging.getLogger(__name__)
+
+
+def infer_provider_from_base_url(base_url: Optional[str] = None) -> str:
+    """
+    Infer the provider name from ANTHROPIC_BASE_URL environment variable.
+
+    Only recognizes known providers from OpenTelemetry semantic conventions.
+    Returns "anthropic" for unknown providers as these are typically Anthropic-compatible API services.
+
+    Args:
+        base_url: Optional base URL to check. If not provided, reads from ANTHROPIC_BASE_URL env var.
+
+    """
+    if base_url is None:
+        base_url = os.environ.get("ANTHROPIC_BASE_URL", "")
+
+    if not base_url:
+        return GenAiProviderNameValues.ANTHROPIC.value
+
+    try:
+        parsed = urlparse(base_url)
+        hostname = parsed.hostname or ""
+
+        # Check for known providers (order matters: most specific first)
+        if "dashscope" in hostname or "aliyuncs.com" in hostname:
+            return GenAiExtendedProviderNameValues.DASHSCOPE.value
+
+        if "moonshot" in hostname:
+            return GenAiExtendedProviderNameValues.MOONSHOT.value
+
+        return GenAiProviderNameValues.ANTHROPIC.value
+
+    except Exception:
+        return GenAiProviderNameValues.ANTHROPIC.value
+
+
+def extract_message_parts(message: Any) -> List[Any]:
+    """
+    Extract parts (text + tool calls) from an AssistantMessage.
+
+    Args:
+        message: AssistantMessage object
+
+    Returns:
+        List of message parts (Text, ToolCall)
+    """
+    parts = []
+    if not hasattr(message, "content"):
+        return parts
+
+    for block in message.content:
+        block_type = type(block).__name__
+        if block_type == "TextBlock":
+            parts.append(Text(content=getattr(block, "text", "")))
+        elif block_type == "ToolUseBlock":
+            tool_call = ToolCall(
+                id=getattr(block, "id", ""),
+                name=getattr(block, "name", ""),
+                arguments=getattr(block, "input", {}),
+            )
+            parts.append(tool_call)
+
+    return parts
+
+
+def extract_usage_metadata(usage: Any) -> Dict[str, Any]:
+    """
+    Extract and normalize usage metrics from a Claude usage object or dict.
+
+    Only extracts standard OpenTelemetry fields: input_tokens and output_tokens.
+    Cache tokens are extracted temporarily for summing into input_tokens.
+
+    Args:
+        usage: Usage object or dict from Claude API
+
+    Returns:
+        Dict with input_tokens, output_tokens, and temporary cache token fields
+    """
+    if not usage:
+        return {}
+
+    get = (
+        usage.get
+        if isinstance(usage, dict)
+        else lambda k: getattr(usage, k, None)
+    )
+
+    def to_int(value):
+        try:
+            return int(value)
+        except (ValueError, TypeError):
+            return None
+
+    meta: Dict[str, Any] = {}
+
+    # Standard OpenTelemetry fields
+    if (v := to_int(get("input_tokens"))) is not None:
+        meta["input_tokens"] = v
+    if (v := to_int(get("output_tokens"))) is not None:
+        meta["output_tokens"] = v
+
+    # Temporarily extract cache tokens for summing (will be summed by sum_anthropic_tokens)
+    if (v := to_int(get("cache_read_input_tokens"))) is not None:
+        meta["cache_read_input_tokens"] = v
+    if (v := to_int(get("cache_creation_input_tokens"))) is not None:
+        meta["cache_creation_input_tokens"] = v
+
+    return meta
+
+
+def sum_anthropic_tokens(usage_metadata: Dict[str, Any]) -> Dict[str, int]:
+    """
+    Sum Anthropic cache tokens into input_tokens.
+
+    Anthropic returns cache tokens separately (cache_read_input_tokens, cache_creation_input_tokens).
+    This function combines them into the standard input_tokens field for OpenTelemetry reporting.
+
+    Args:
+        usage_metadata: Usage metadata dict with input_tokens, output_tokens, and optional cache tokens
+
+    Returns:
+        Dict with only standard OpenTelemetry fields: input_tokens and output_tokens
+    """
+    # Get standard token counts
+    input_tokens = usage_metadata.get("input_tokens") or 0
+    output_tokens = usage_metadata.get("output_tokens") or 0
+
+    # Get cache tokens (these are temporary fields, not in OpenTelemetry standard)
+    cache_read = usage_metadata.get("cache_read_input_tokens") or 0
+    cache_create = usage_metadata.get("cache_creation_input_tokens") or 0
+
+    # Sum all input tokens (standard + cache)
+    total_input_tokens = input_tokens + cache_read + cache_create
+
+    # Return only standard OpenTelemetry fields
+    return {
+        "input_tokens": total_input_tokens,
+        "output_tokens": output_tokens,
+    }
+
+
+def extract_usage_from_result_message(message: Any) -> Dict[str, Any]:
+    """Normalize and merge token usage metrics from a `ResultMessage`."""
+    if not getattr(message, "usage", None):
+        return {}
+    metrics = extract_usage_metadata(message.usage)
+    return sum_anthropic_tokens(metrics) if metrics else {}
+
+
+def truncate_value(value: Any, max_length: int = 150) -> str:
+    """
+    Truncate a value for display.
+
+    - For strings: truncate with ellipsis
+    - For lists: show first few items
+    - For dicts: show truncated version
+    - For other types: convert to string
+    """
+    if isinstance(value, str):
+        if len(value) <= max_length:
+            return value
+        return value[:max_length] + "..."
+
+    if isinstance(value, list):
+        if len(value) == 0:
+            return "[]"
+        if len(value) <= 3:
+            items_str = ", ".join(
+                truncate_value(item, max_length // 3) for item in value
+            )
+            if len(items_str) <= max_length:
+                return f"[{items_str}]"
+        first_items = ", ".join(
+            truncate_value(item, max_length // 4) for item in value[:2]
+        )
+        return f"[{first_items}, ... ({len(value)} items)]"
+
+    if isinstance(value, dict):
+        if len(value) == 0:
+            return "{}"
+        items = []
+        for i, (k, v) in enumerate(value.items()):
+            if i >= 2:
+                items.append(f"... ({len(value)} keys)")
+                break
+            v_str = truncate_value(v, max_length // 3)
+            items.append(f"{k}: {v_str}")
+        return "{" + ", ".join(items) + "}"
+
+    return str(value)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/version.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/version.py
new file mode 100644
index 000000000..e7844f893
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/version.py
@@ -0,0 +1,15 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = "0.1.0.dev0"
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
new file mode 100644
index 000000000..b7b42259d
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
@@ -0,0 +1,115 @@
+"""Unit tests configuration module."""
+
+import os
+
+import pytest
+
+# Set up environment variables BEFORE any claude_agent_sdk modules are imported
+# This is critical because claude_agent_sdk reads environment variables at module import time
+if "ANTHROPIC_API_KEY" not in os.environ:
+    # Use DashScope proxy for testing
+    os.environ["ANTHROPIC_BASE_URL"] = (
+        "https://dashscope.aliyuncs.com/apps/anthropic"
+    )
+    os.environ["ANTHROPIC_API_KEY"] = "test_anthropic_api_key"
+    os.environ["DASHSCOPE_API_KEY"] = "test_dashscope_api_key"
+
+# Set GenAI semantic conventions environment variables
+os.environ.setdefault(
+    "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental"
+)
+os.environ.setdefault(
+    "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY"
+)
+
+from opentelemetry.instrumentation._semconv import (
+    OTEL_SEMCONV_STABILITY_OPT_IN,
+    _OpenTelemetrySemanticConventionStability,
+)
+from opentelemetry.instrumentation.claude_agent_sdk import (
+    ClaudeAgentSDKInstrumentor,
+)
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
+from opentelemetry.util.genai.environment_variables import (
+    OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
+)
+
+
+@pytest.fixture(scope="function", name="span_exporter")
+def fixture_span_exporter():
+    """Create an in-memory span exporter for testing."""
+    exporter = InMemorySpanExporter()
+    yield exporter
+
+
+@pytest.fixture(scope="function", name="tracer_provider")
+def fixture_tracer_provider(span_exporter):
+    """Create a tracer provider with in-memory exporter."""
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(span_exporter))
+    return provider
+
+
+@pytest.fixture(scope="function")
+def instrument(tracer_provider):
+    """Instrument Claude Agent SDK for testing."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    instrumentor.instrument(tracer_provider=tracer_provider)
+
+    yield instrumentor
+
+    instrumentor.uninstrument()
+
+
+@pytest.fixture(scope="function")
+def instrument_no_content(tracer_provider):
+    """Instrument Claude Agent SDK with message content capture disabled."""
+    # Reset global state to allow environment variable changes to take effect
+    _OpenTelemetrySemanticConventionStability._initialized = False
+
+    os.environ.update(
+        {
+            OTEL_SEMCONV_STABILITY_OPT_IN: "gen_ai_latest_experimental",
+            OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "NO_CONTENT",
+        }
+    )
+
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    instrumentor.instrument(tracer_provider=tracer_provider)
+
+    yield instrumentor
+
+    os.environ.pop(OTEL_SEMCONV_STABILITY_OPT_IN, None)
+    os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None)
+    instrumentor.uninstrument()
+    # Reset global state after test
+    _OpenTelemetrySemanticConventionStability._initialized = False
+
+
+@pytest.fixture(scope="function")
+def instrument_with_content(tracer_provider):
+    """Instrument Claude Agent SDK with message content capture enabled."""
+    # Reset global state to allow environment variable changes to take effect
+    _OpenTelemetrySemanticConventionStability._initialized = False
+
+    os.environ.update(
+        {
+            OTEL_SEMCONV_STABILITY_OPT_IN: "gen_ai_latest_experimental",
+            OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "SPAN_ONLY",
+        }
+    )
+
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    instrumentor.instrument(tracer_provider=tracer_provider)
+
+    yield instrumentor
+
+    os.environ.pop(OTEL_SEMCONV_STABILITY_OPT_IN, None)
+    os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None)
+    instrumentor.uninstrument()
+    # Reset global state after test
+    _OpenTelemetrySemanticConventionStability._initialized = False
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
new file mode 100644
index 000000000..6dd17a455
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
@@ -0,0 +1,26 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This variant of the requirements aims to test the system using
+# the latest available versions of external dependencies.
+
+claude-agent-sdk>=0.1.0
+pytest
+pytest-asyncio
+wrapt
+opentelemetry-exporter-otlp-proto-http
+
+-e instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk
+-e util/opentelemetry-util-genai
+
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
new file mode 100644
index 000000000..e642ae450
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
@@ -0,0 +1,29 @@
+# Copyright The OpenTelemetry Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This variant of the requirements aims to test the system using
+# the oldest supported version of external dependencies.
+
+claude-agent-sdk>=0.1.0
+pytest==7.4.4
+pytest-asyncio==0.21.0
+wrapt==1.17.3
+opentelemetry-exporter-otlp-proto-http~=1.30
+opentelemetry-api==1.37
+opentelemetry-sdk==1.37
+opentelemetry-semantic-conventions==0.58b0
+
+-e instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk
+-e util/opentelemetry-util-genai
+
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
new file mode 100644
index 000000000..a5aaecace
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
@@ -0,0 +1,313 @@
+"""Configuration and attribute tests for Claude Agent SDK instrumentation."""
+
+import asyncio
+
+import pytest
+
+from opentelemetry.instrumentation import claude_agent_sdk
+from opentelemetry.instrumentation.claude_agent_sdk import (
+    ClaudeAgentSDKInstrumentor,
+    __version__,
+    hooks,
+    utils,
+)
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAIAttributes,
+)
+
+
+@pytest.mark.asyncio
+async def test_span_attributes_semantic_conventions(instrument, span_exporter):
+    """Test that all spans follow semantic conventions."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async for _ in query(prompt="Hello", options=options):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+
+    for span in spans:
+        # All spans should have a name
+        assert span.name is not None
+        assert len(span.name) > 0
+
+        # Spans should have proper status
+        assert span.status is not None
+
+        # Check if it's an LLM span
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in span.attributes:
+            operation = span.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
+
+            if operation == "chat":
+                # LLM spans must have provider
+                assert GenAIAttributes.GEN_AI_PROVIDER_NAME in span.attributes
+                # LLM spans must have model
+                assert GenAIAttributes.GEN_AI_REQUEST_MODEL in span.attributes
+
+
+@pytest.mark.asyncio
+async def test_agent_span_naming_convention(instrument, span_exporter):
+    """Test agent span naming follows conventions."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async for _ in query(prompt="Test", options=options):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    agent_spans = [s for s in spans if "invoke_agent" in s.name]
+
+    assert len(agent_spans) >= 1
+    agent_span = agent_spans[0]
+
+    # Agent span name should contain agent name
+    assert (
+        "claude-agent" in agent_span.name or "invoke_agent" in agent_span.name
+    )
+
+
+@pytest.mark.asyncio
+async def test_llm_span_naming_convention(instrument, span_exporter):
+    """Test LLM span naming follows conventions."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async for _ in query(prompt="Test", options=options):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    llm_spans = [
+        s
+        for s in spans
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in s.attributes
+    ]
+
+    assert len(llm_spans) >= 1
+    llm_span = llm_spans[0]
+
+    # LLM span name should follow pattern: "{operation} {model}"
+    assert "chat" in llm_span.name
+    assert "qwen" in llm_span.name.lower() or "qwen-plus" in llm_span.name
+
+
+@pytest.mark.asyncio
+async def test_tool_span_naming_convention(instrument, span_exporter):
+    """Test tool span naming follows conventions."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        allowed_tools=["Write"],
+        max_turns=2,
+    )
+
+    async for _ in query(
+        prompt="Create a file test.txt with content 'test'", options=options
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    tool_spans = [s for s in spans if "execute_tool" in s.name]
+
+    if tool_spans:
+        tool_span = tool_spans[0]
+        # Tool span should have tool name in name
+        assert "execute_tool" in tool_span.name
+
+
+@pytest.mark.asyncio
+async def test_span_context_propagation(instrument, span_exporter):
+    """Test that span context is properly propagated."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async for _ in query(prompt="Test", options=options):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+
+    # Find agent span
+    agent_spans = [s for s in spans if "invoke_agent" in s.name]
+    if not agent_spans:
+        return  # No agent span, skip
+
+    agent_span = agent_spans[0]
+    agent_span_id = agent_span.context.span_id
+
+    # All other spans should have the agent span as parent
+    for span in spans:
+        if span != agent_span and span.parent:
+            # Parent should be agent span
+            assert span.parent.span_id == agent_span_id
+
+
+@pytest.mark.asyncio
+async def test_token_usage_attributes(instrument, span_exporter):
+    """Test that token usage attributes are captured."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async for _ in query(prompt="What is AI?", options=options):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    llm_spans = [
+        s
+        for s in spans
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in s.attributes
+    ]
+
+    if llm_spans:
+        llm_span = llm_spans[0]
+
+        # Should have token usage (might not always be present)
+        # Just check the structure is correct if present
+        if GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in llm_span.attributes:
+            input_tokens = llm_span.attributes[
+                GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS
+            ]
+            assert isinstance(input_tokens, int)
+            assert input_tokens >= 0
+
+        if GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in llm_span.attributes:
+            output_tokens = llm_span.attributes[
+                GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS
+            ]
+            assert isinstance(output_tokens, int)
+            assert output_tokens >= 0
+
+
+def test_instrumentor_dependencies(instrument):
+    """Test that instrumentor declares dependencies correctly."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    deps = instrumentor.instrumentation_dependencies()
+
+    # Should have claude-agent-sdk as dependency
+    assert len(deps) > 0
+    assert any("claude-agent-sdk" in dep for dep in deps)
+
+
+def test_instrumentor_with_custom_providers(tracer_provider, span_exporter):
+    """Test instrumentor with custom tracer and meter providers."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    meter_provider = MeterProvider()
+
+    # Should accept custom providers
+    instrumentor.instrument(
+        tracer_provider=tracer_provider,
+        meter_provider=meter_provider,
+    )
+
+    instrumentor.uninstrument()
+
+
+def test_version_exported():
+    """Test that version is exported."""
+    assert __version__ is not None
+    assert isinstance(__version__, str)
+    assert len(__version__) > 0
+
+
+def test_instrumentor_class_exported():
+    """Test that ClaudeAgentSDKInstrumentor is exported."""
+    assert hasattr(claude_agent_sdk, "ClaudeAgentSDKInstrumentor")
+    assert hasattr(claude_agent_sdk, "__version__")
+
+
+@pytest.mark.asyncio
+async def test_multiple_concurrent_queries(instrument, span_exporter):
+    """Test that multiple concurrent queries are handled correctly."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async def run_query(prompt):
+        async for _ in query(prompt=prompt, options=options):
+            pass
+
+    # Run multiple queries concurrently
+    await asyncio.gather(
+        run_query("What is 1+1?"),
+        run_query("What is 2+2?"),
+    )
+
+    spans = span_exporter.get_finished_spans()
+
+    # Should have spans from both queries
+    # At least 2 agent spans
+    agent_spans = [s for s in spans if "invoke_agent" in s.name]
+    assert len(agent_spans) >= 2
+
+
+@pytest.mark.asyncio
+async def test_span_attributes_no_sensitive_data(
+    instrument_no_content, span_exporter
+):
+    """Test that sensitive data is not captured when content capture is disabled."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    sensitive_prompt = "My password is secret123"
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    async for _ in query(prompt=sensitive_prompt, options=options):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+
+    # Check that sensitive data is not in any span attributes
+    for span in spans:
+        for attr_value in span.attributes.values():
+            if isinstance(attr_value, str):
+                # Sensitive content should not be in attributes
+                assert "secret123" not in attr_value.lower()
+
+
+def test_hooks_are_exported():
+    """Test that hooks are exported for external use."""
+    # Check internal hooks can be imported
+    assert hasattr(hooks, "pre_tool_use_hook")
+    assert hasattr(hooks, "post_tool_use_hook")
+
+
+def test_utils_are_internal():
+    """Test that utils are properly organized."""
+    # Utils should have the helper functions
+    assert hasattr(utils, "truncate_value")
+    assert hasattr(utils, "extract_usage_metadata")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
new file mode 100644
index 000000000..54befef26
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
@@ -0,0 +1,353 @@
+"""Error handling and edge case tests for Claude Agent SDK instrumentation."""
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_query_with_api_error(instrument, span_exporter):
+    """Test that API errors are properly captured in spans."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    # Try a query that might fail (invalid prompt or rate limit)
+    try:
+        async for _ in query(prompt="", options=options):
+            pass
+    except Exception:
+        pass  # Expected to fail
+
+    # Get spans
+    spans = span_exporter.get_finished_spans()
+
+    # Should still have spans even on error
+    assert len(spans) >= 0
+
+
+@pytest.mark.asyncio
+async def test_query_with_empty_prompt(instrument, span_exporter):
+    """Test behavior with empty prompt."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    # Empty prompt should still be tracked
+    try:
+        count = 0
+        async for _ in query(prompt="", options=options):
+            count += 1
+            if count > 5:  # Prevent infinite loop
+                break
+    except Exception:
+        pass
+
+
+@pytest.mark.asyncio
+async def test_client_context_manager_exception(instrument, span_exporter):
+    """Test that exceptions in context manager are handled."""
+    from claude_agent_sdk import ClaudeSDKClient  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(model="qwen-plus")
+
+    try:
+        async with ClaudeSDKClient(options=options) as client:
+            await client.query(prompt="test")
+            # Simulate an error
+            raise RuntimeError("Simulated error")
+    except RuntimeError:
+        pass  # Expected
+
+    # Spans should still be exported
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) >= 0
+
+
+def test_instrumentor_with_invalid_tracer_provider():
+    """Test instrumentor with invalid tracer provider."""
+    from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
+        ClaudeAgentSDKInstrumentor,
+    )
+
+    instrumentor = ClaudeAgentSDKInstrumentor()
+
+    # Should handle invalid provider gracefully
+    instrumentor.instrument(tracer_provider=None)
+    instrumentor.uninstrument()
+
+
+def test_instrumentor_multiple_instrument_uninstrument_cycles():
+    """Test multiple instrument/uninstrument cycles."""
+    from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
+        ClaudeAgentSDKInstrumentor,
+    )
+    from opentelemetry.sdk.trace import TracerProvider  # noqa: PLC0415
+
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    tracer_provider = TracerProvider()
+
+    # Multiple cycles should not cause issues
+    for _ in range(3):
+        instrumentor.instrument(tracer_provider=tracer_provider)
+        instrumentor.uninstrument()
+
+
+def test_hook_with_none_tool_use_id(instrument):
+    """Test hook behavior with None tool_use_id."""
+    import asyncio  # noqa: PLC0415
+
+    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
+        post_tool_use_hook,
+        pre_tool_use_hook,
+    )
+
+    # Pre hook with None ID
+    result = asyncio.run(
+        pre_tool_use_hook(
+            {"tool_name": "test", "tool_input": {}},
+            None,  # tool_use_id is None
+            {},
+        )
+    )
+    assert result == {}
+
+    # Post hook with None ID
+    result = asyncio.run(
+        post_tool_use_hook(
+            {"tool_name": "test", "tool_response": "ok"},
+            None,  # tool_use_id is None
+            {},
+        )
+    )
+    assert result == {}
+
+
+def test_hook_with_empty_input_data(instrument):
+    """Test hook behavior with empty input data."""
+    import asyncio  # noqa: PLC0415
+
+    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
+        post_tool_use_hook,
+        pre_tool_use_hook,
+    )
+
+    # Pre hook with empty data
+    result = asyncio.run(
+        pre_tool_use_hook(
+            {},  # empty input
+            "test-id",
+            {},
+        )
+    )
+    assert result == {}
+
+    # Post hook with empty data
+    result = asyncio.run(
+        post_tool_use_hook(
+            {},  # empty input
+            "test-id",
+            {},
+        )
+    )
+    assert result == {}
+
+
+def test_context_clear_when_not_set():
+    """Test clearing context when nothing is set."""
+    from opentelemetry.instrumentation.claude_agent_sdk.context import (  # noqa: PLC0415
+        clear_parent_invocation,
+        get_parent_invocation,
+    )
+
+    # Clear when empty
+    clear_parent_invocation()
+
+    # Should return None
+    assert get_parent_invocation() is None
+
+    # Clear again (should not raise)
+    clear_parent_invocation()
+
+
+def test_context_set_with_none():
+    """Test setting context with None value."""
+    from opentelemetry.instrumentation.claude_agent_sdk.context import (  # noqa: PLC0415
+        clear_parent_invocation,
+        get_parent_invocation,
+        set_parent_invocation,
+    )
+
+    # Set to None
+    set_parent_invocation(None)
+
+    # Should retrieve None
+    assert get_parent_invocation() is None
+
+    clear_parent_invocation()
+
+
+def test_clear_active_tool_runs_with_empty_runs():
+    """Test clearing tool runs when there are none."""
+    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
+        clear_active_tool_runs,
+    )
+
+    # Should not raise even if no active runs
+    clear_active_tool_runs()
+
+
+def test_clear_active_tool_runs_multiple_times():
+    """Test clearing tool runs multiple times."""
+    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
+        clear_active_tool_runs,
+    )
+
+    # Multiple clears should be safe
+    for _ in range(3):
+        clear_active_tool_runs()
+
+
+def test_utils_truncate_with_zero_max_length():
+    """Test truncate with zero max length."""
+    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
+        truncate_value,
+    )
+
+    result = truncate_value("hello", max_length=0)
+    # Should handle gracefully
+    assert isinstance(result, str)
+
+
+def test_utils_truncate_with_negative_max_length():
+    """Test truncate with negative max length."""
+    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
+        truncate_value,
+    )
+
+    result = truncate_value("hello", max_length=-1)
+    # Should handle gracefully
+    assert isinstance(result, str)
+
+
+def test_utils_truncate_with_circular_reference():
+    """Test truncate handles circular references."""
+    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
+        truncate_value,
+    )
+
+    # Create circular reference
+    a = {}
+    b = {"parent": a}
+    a["child"] = b
+
+    # Should not infinite loop - will raise RecursionError which is expected
+    try:
+        result = truncate_value(a, max_length=100)
+        # If it doesn't raise, it should return a string
+        assert isinstance(result, str)
+    except RecursionError:
+        # This is acceptable - circular references are edge cases
+        pass
+
+
+def test_utils_extract_usage_with_non_numeric_strings():
+    """Test usage extraction with string values."""
+    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
+        extract_usage_metadata,
+    )
+
+    usage = {
+        "input_tokens": "100",
+        "output_tokens": "50",
+    }
+
+    result = extract_usage_metadata(usage)
+    # Should attempt to convert strings to int
+    assert isinstance(result, dict)
+
+
+def test_utils_sum_tokens_with_none_values():
+    """Test token summation with None values."""
+    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
+        sum_anthropic_tokens,
+    )
+
+    usage = {
+        "input_tokens": None,
+        "output_tokens": None,
+    }
+
+    result = sum_anthropic_tokens(usage)
+    # Should handle None values - converts to 0
+    assert result["input_tokens"] == 0
+    assert result["output_tokens"] == 0
+
+
+def test_utils_sum_tokens_with_negative_values():
+    """Test token summation with negative values."""
+    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
+        sum_anthropic_tokens,
+    )
+
+    usage = {
+        "input_tokens": -10,
+        "output_tokens": 50,
+    }
+
+    result = sum_anthropic_tokens(usage)
+    # Should process even if values are negative
+    assert result["input_tokens"] == -10
+    assert result["output_tokens"] == 50
+
+
+@pytest.mark.asyncio
+async def test_query_with_very_long_prompt(instrument, span_exporter):
+    """Test query with very long prompt."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    options = ClaudeAgentOptions(
+        model="qwen-plus",
+        max_turns=1,
+    )
+
+    # Very long prompt
+    long_prompt = "test " * 1000
+
+    try:
+        count = 0
+        async for _ in query(prompt=long_prompt, options=options):
+            count += 1
+            if count > 5:
+                break
+    except Exception:
+        pass  # May fail due to token limits
+
+    # Should still create spans
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) >= 0
+
+
+def test_patch_with_missing_module():
+    """Test that instrumentation handles missing SDK gracefully."""
+    from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
+        ClaudeAgentSDKInstrumentor,
+    )
+    from opentelemetry.sdk.trace import TracerProvider  # noqa: PLC0415
+
+    instrumentor = ClaudeAgentSDKInstrumentor()
+
+    # Even if SDK is not installed properly, should not crash
+    try:
+        instrumentor.instrument(tracer_provider=TracerProvider())
+        instrumentor.uninstrument()
+    except Exception:
+        pass  # Expected if SDK is not installed
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
new file mode 100644
index 000000000..c4fa4cb97
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
@@ -0,0 +1,183 @@
+"""Integration tests using mocked SDK client to avoid API calls.
+
+These tests mock the Claude Agent SDK at a lower level to simulate
+realistic scenarios without requiring API keys.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+
+from opentelemetry.instrumentation.claude_agent_sdk import (
+    ClaudeAgentSDKInstrumentor,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.context import (
+    clear_parent_invocation,
+    get_parent_invocation,
+    set_parent_invocation,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.hooks import (
+    post_tool_use_hook,
+    pre_tool_use_hook,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.utils import (
+    extract_usage_metadata,
+    sum_anthropic_tokens,
+    truncate_value,
+)
+from opentelemetry.sdk.metrics import MeterProvider
+
+
+@pytest.mark.asyncio
+async def test_client_with_mocked_response(instrument, span_exporter):
+    """Test client instrumentation with fully mocked SDK."""
+    from claude_agent_sdk import ClaudeSDKClient  # noqa: PLC0415
+    from claude_agent_sdk.types import ClaudeAgentOptions  # noqa: PLC0415
+
+    # Create a mock response
+    mock_msg = Mock()
+    mock_msg.content = [Mock(text="Mocked response", type="text")]
+    mock_msg.usage = Mock(
+        input_tokens=50,
+        output_tokens=10,
+        cache_read_input_tokens=0,
+        cache_creation_input_tokens=0,
+    )
+
+    options = ClaudeAgentOptions(model="qwen-plus")
+
+    # Mock the underlying client query method
+    with patch.object(
+        ClaudeSDKClient, "query", new_callable=AsyncMock
+    ) as mock_query:
+        mock_query.return_value = [mock_msg]
+
+        async with ClaudeSDKClient(options=options) as client:
+            result = await client.query(prompt="Test")
+            assert result is not None
+
+    # Verify spans were created
+    spans = span_exporter.get_finished_spans()
+    # Note: spans might not be created if instrumentation doesn't hook into the mocked method
+    # This is expected behavior for this type of test
+    assert isinstance(spans, (list, tuple))
+
+
+@pytest.mark.asyncio
+async def test_instrumentor_doesnt_crash_with_mocks(instrument, span_exporter):
+    """Test that instrumentor doesn't crash even with mock objects."""
+    # This test验证instrumentation可以安全处理mock对象
+    mock_msg = Mock()
+    mock_msg.content = []
+    mock_msg.usage = None
+
+    # 使用instrumented环境处理mock对象不应该崩溃
+    try:
+        # Simulate what instrumentation might do
+        if hasattr(mock_msg, "usage") and mock_msg.usage:
+            pass  # Would extract usage
+        if hasattr(mock_msg, "content"):
+            pass  # Would process content
+    except Exception as e:
+        pytest.fail(f"Instrumentation crashed with mock object: {e}")
+
+    # Should complete without error
+    assert True
+
+
+def test_utils_work_with_mock_data(instrument):
+    """Test that utility functions work with mock data."""
+    # Test with mock usage object
+    mock_usage = Mock()
+    mock_usage.input_tokens = 100
+    mock_usage.output_tokens = 50
+
+    usage_data = extract_usage_metadata(mock_usage)
+    assert usage_data["input_tokens"] == 100
+    assert usage_data["output_tokens"] == 50
+
+    # Test token summation
+    summed = sum_anthropic_tokens(usage_data)
+    assert summed["input_tokens"] == 100
+    assert summed["output_tokens"] == 50
+
+    # Test truncation
+    truncated = truncate_value("test" * 100, max_length=50)
+    assert len(truncated) <= 53  # 50 + "..."
+
+
+def test_context_operations_isolated(instrument):
+    """Test context operations work in isolated test environment."""
+    # Set and retrieve
+    test_value = "test_invocation_123"
+    set_parent_invocation(test_value)
+    assert get_parent_invocation() == test_value
+
+    # Clear
+    clear_parent_invocation()
+    assert get_parent_invocation() is None
+
+
+def test_hooks_can_be_called_directly(instrument):
+    """Test that hooks can be called directly without crashing."""
+    # Call pre hook
+    tool_data = {
+        "tool_name": "TestTool",
+        "tool_input": {"param": "value"},
+    }
+
+    try:
+        result = asyncio.run(pre_tool_use_hook(tool_data, "tool_123", {}))
+        assert isinstance(result, dict)
+    except Exception as e:
+        # Hook might need full context, but shouldn't crash hard
+        print(f"Hook raised: {e}")
+
+    # Call post hook
+    result_data = {
+        "tool_name": "TestTool",
+        "tool_response": "success",
+    }
+
+    try:
+        result = asyncio.run(post_tool_use_hook(result_data, "tool_123", {}))
+        assert isinstance(result, dict)
+    except Exception as e:
+        print(f"Hook raised: {e}")
+
+
+def test_instrumentor_lifecycle_complete(tracer_provider):
+    """Test complete instrumentor lifecycle."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+
+    # Instrument
+    instrumentor.instrument(tracer_provider=tracer_provider)
+    assert instrumentor._handler is not None
+
+    # Uninstrument
+    instrumentor.uninstrument()
+    assert instrumentor._handler is None
+
+    # Re-instrument
+    instrumentor.instrument(tracer_provider=tracer_provider)
+    assert instrumentor._handler is not None
+
+    # Final cleanup
+    instrumentor.uninstrument()
+
+
+def test_instrumentation_with_different_configs(tracer_provider):
+    """Test instrumentation with different configurations."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    meter_provider = MeterProvider()
+
+    # With both providers
+    instrumentor.instrument(
+        tracer_provider=tracer_provider,
+        meter_provider=meter_provider,
+    )
+
+    assert instrumentor._handler is not None
+
+    instrumentor.uninstrument()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
new file mode 100644
index 000000000..4cd9fe073
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
@@ -0,0 +1,244 @@
+"""Mock-based tests for Claude Agent SDK instrumentation."""
+
+import threading
+from unittest.mock import Mock, patch
+
+import pytest
+
+from opentelemetry.instrumentation.claude_agent_sdk import (
+    ClaudeAgentSDKInstrumentor,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.context import (
+    clear_parent_invocation,
+    get_parent_invocation,
+    set_parent_invocation,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.utils import (
+    extract_usage_from_result_message,
+    extract_usage_metadata,
+    sum_anthropic_tokens,
+    truncate_value,
+)
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAIAttributes,
+)
+
+
+@pytest.mark.asyncio
+async def test_agent_span_attributes_complete(instrument, span_exporter):
+    """Test that agent span has all required attributes."""
+    from claude_agent_sdk import query  # noqa: PLC0415
+    from claude_agent_sdk.types import (  # noqa: PLC0415
+        AssistantMessage,
+        ClaudeAgentOptions,
+        TextBlock,
+    )
+
+    # Mock the query function to return controlled data
+    with patch("claude_agent_sdk.query") as mock_query:
+        # Create mock messages
+        mock_assistant_msg = Mock(spec=AssistantMessage)
+        mock_assistant_msg.content = [Mock(spec=TextBlock, text="4")]
+
+        async def mock_generator(*args, **kwargs):
+            yield mock_assistant_msg
+
+        mock_query.return_value = mock_generator()
+
+        # Execute with instrumentation
+        options = ClaudeAgentOptions(model="qwen-plus")
+        messages = []
+        async for msg in query(prompt="2+2?", options=options):
+            messages.append(msg)
+
+    # Get spans
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) > 0
+
+    # Find agent span
+    agent_spans = [s for s in spans if "invoke_agent" in s.name]
+    if agent_spans:
+        agent_span = agent_spans[0]
+
+        # Verify all semantic convention attributes
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in agent_span.attributes
+        assert GenAIAttributes.GEN_AI_REQUEST_MODEL in agent_span.attributes
+
+
+def test_utils_extract_usage_with_none(instrument):
+    """Test usage extraction with None input."""
+    result = extract_usage_metadata(None)
+    assert result == {}
+
+
+def test_utils_extract_usage_with_empty_dict(instrument):
+    """Test usage extraction with empty dict."""
+    result = extract_usage_metadata({})
+    assert result == {}
+
+
+def test_utils_extract_usage_with_invalid_values(instrument):
+    """Test usage extraction with invalid values."""
+    usage = {
+        "input_tokens": "invalid",
+        "output_tokens": None,
+        "cache_read_input_tokens": "not_a_number",
+    }
+
+    result = extract_usage_metadata(usage)
+    # Should handle invalid values gracefully
+    assert isinstance(result, dict)
+
+
+def test_utils_sum_tokens_with_missing_fields(instrument):
+    """Test token summation with missing fields."""
+    # Missing output_tokens - should default to 0
+    result = sum_anthropic_tokens({"input_tokens": 100})
+    assert result["input_tokens"] == 100
+    assert result["output_tokens"] == 0
+
+    # Missing input_tokens - should default to 0
+    result = sum_anthropic_tokens({"output_tokens": 50})
+    assert result["input_tokens"] == 0
+    assert result["output_tokens"] == 50
+
+
+def test_utils_sum_tokens_with_cache_details(instrument):
+    """Test token summation with cache details in different formats."""
+    # Note: Current implementation doesn't support nested input_token_details
+    # It only reads top-level cache_read_input_tokens and cache_creation_input_tokens
+
+    # Format 1: nested input_token_details (NOT supported yet)
+    usage1 = {
+        "input_tokens": 100,
+        "output_tokens": 50,
+        "input_token_details": {
+            "cache_read": 10,
+            "cache_creation": 5,
+        },
+    }
+    result1 = sum_anthropic_tokens(usage1)
+    # Since nested format is not supported, only gets base input_tokens
+    assert result1["input_tokens"] == 100  # No cache added
+    assert result1["output_tokens"] == 50
+
+    # Format 2: flat cache fields (supported)
+    usage2 = {
+        "input_tokens": 100,
+        "output_tokens": 50,
+        "cache_read_input_tokens": 10,
+        "cache_creation_input_tokens": 5,
+    }
+    result2 = sum_anthropic_tokens(usage2)
+    assert result2["input_tokens"] == 115  # 100 + 10 + 5
+    assert result2["output_tokens"] == 50
+
+
+def test_utils_smart_truncate_edge_cases(instrument):
+    """Test smart truncate with various edge cases."""
+    # Empty string
+    assert truncate_value("") == ""
+
+    # None
+    assert truncate_value(None) == "None"
+
+    # Numbers
+    assert truncate_value(42) == "42"
+    assert truncate_value(3.14) == "3.14"
+
+    # Boolean
+    assert truncate_value(True) == "True"
+
+    # Empty list
+    assert truncate_value([]) == "[]"
+
+    # Empty dict
+    assert truncate_value({}) == "{}"
+
+    # Nested structures
+    nested = {"a": {"b": {"c": [1, 2, 3]}}}
+    result = truncate_value(nested)
+    assert isinstance(result, str)
+    assert "{" in result
+
+
+def test_context_thread_safety(instrument):
+    """Test context operations are thread-safe."""
+    results = []
+
+    def thread_func(value):
+        set_parent_invocation(value)
+        retrieved = get_parent_invocation()
+        results.append(retrieved == value)
+        clear_parent_invocation()
+
+    threads = []
+    for i in range(5):
+        t = threading.Thread(target=thread_func, args=(f"invocation_{i}",))
+        threads.append(t)
+        t.start()
+
+    for t in threads:
+        t.join()
+
+    # Each thread should have retrieved its own value
+    assert all(results)
+
+
+def test_instrumentor_double_instrument(instrument, tracer_provider):
+    """Test that double instrumentation doesn't cause issues."""
+    # First instrumentation already done by fixture
+    # Try to instrument again
+    instrumentor2 = ClaudeAgentSDKInstrumentor()
+    instrumentor2.instrument(tracer_provider=tracer_provider)
+
+    # Should not raise
+    instrumentor2.uninstrument()
+
+
+def test_instrumentor_uninstrument_without_instrument():
+    """Test uninstrument without prior instrument."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    # Should not raise even if not instrumented
+    instrumentor.uninstrument()
+
+
+def test_usage_extraction_from_result_message_no_usage(instrument):
+    """Test usage extraction when result message has no usage."""
+    # Mock message without usage
+    mock_msg = Mock()
+    mock_msg.usage = None
+
+    result = extract_usage_from_result_message(mock_msg)
+    assert result == {}
+
+
+def test_usage_extraction_from_result_message_with_usage(instrument):
+    """Test usage extraction with valid usage data."""
+    # Mock message with usage
+    mock_msg = Mock()
+    mock_msg.usage = Mock()
+    mock_msg.usage.input_tokens = 100
+    mock_msg.usage.output_tokens = 50
+    mock_msg.usage.cache_read_input_tokens = 10
+    mock_msg.usage.cache_creation_input_tokens = 5
+
+    result = extract_usage_from_result_message(mock_msg)
+    # Cache tokens should be summed into input_tokens
+    assert result["input_tokens"] == 115  # 100 + 10 + 5
+    assert result["output_tokens"] == 50
+    # Only standard OpenTelemetry fields
+    assert "total_tokens" not in result
+    assert "cache_read_input_tokens" not in result
+
+
+def test_extract_usage_with_object_style_access(instrument):
+    """Test usage extraction with object attribute access."""
+    # Mock object with attributes
+    mock_usage = Mock()
+    mock_usage.input_tokens = 100
+    mock_usage.output_tokens = 50
+
+    result = extract_usage_metadata(mock_usage)
+    assert result["input_tokens"] == 100
+    assert result["output_tokens"] == 50
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py
new file mode 100644
index 000000000..70cc5b27a
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py
@@ -0,0 +1,175 @@
+"""Unit tests for Claude Agent SDK instrumentation without VCR."""
+
+import os
+
+from opentelemetry.instrumentation.claude_agent_sdk import (
+    ClaudeAgentSDKInstrumentor,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.context import (
+    clear_parent_invocation,
+    get_parent_invocation,
+    set_parent_invocation,
+)
+from opentelemetry.instrumentation.claude_agent_sdk.utils import (
+    extract_usage_metadata,
+    infer_provider_from_base_url,
+    sum_anthropic_tokens,
+    truncate_value,
+)
+from opentelemetry.sdk.trace import TracerProvider
+
+
+def test_instrumentor_init():
+    """Test that instrumentor can be initialized."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    assert instrumentor is not None
+
+
+def test_instrument_and_uninstrument():
+    """Test that instrumentation can be applied and removed."""
+    tracer_provider = TracerProvider()
+    instrumentor = ClaudeAgentSDKInstrumentor()
+
+    # Should not raise
+    instrumentor.instrument(tracer_provider=tracer_provider)
+
+    # Should not raise
+    instrumentor.uninstrument()
+
+
+def test_instrumentation_dependencies():
+    """Test that instrumentation dependencies are defined."""
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    deps = instrumentor.instrumentation_dependencies()
+
+    assert deps is not None
+    assert len(deps) > 0
+    assert "claude-agent-sdk" in deps[0]
+
+
+def test_utils_safe_truncate():
+    """Test truncate_value utility function."""
+    # Test short string
+    result = truncate_value("hello")
+    assert result == "hello"
+
+    # Test long string
+    long_str = "a" * 200
+    result = truncate_value(long_str, max_length=150)
+    assert len(result) <= 153  # 150 + "..."
+    assert result.endswith("...")
+
+    # Test list
+    result = truncate_value([1, 2, 3])
+    assert "[" in result
+    assert "]" in result
+
+    # Test dict
+    result = truncate_value({"key": "value"})
+    assert "{" in result
+    assert "}" in result
+
+
+def test_context_operations():
+    """Test thread-local context operations."""
+    # Initially should be None
+    assert get_parent_invocation() is None
+
+    # Set a mock invocation
+    mock_invocation = {"test": "value"}
+    set_parent_invocation(mock_invocation)
+
+    # Should retrieve the same object
+    retrieved = get_parent_invocation()
+    assert retrieved == mock_invocation
+
+    # Clear should remove it
+    clear_parent_invocation()
+    assert get_parent_invocation() is None
+
+
+def test_usage_extraction():
+    """Test usage metadata extraction."""
+    # Test with dict
+    usage = {
+        "input_tokens": 100,
+        "output_tokens": 50,
+        "cache_read_input_tokens": 10,
+        "cache_creation_input_tokens": 5,
+    }
+
+    result = extract_usage_metadata(usage)
+    assert result["input_tokens"] == 100
+    assert result["output_tokens"] == 50
+    # Cache tokens are temporarily extracted for summing
+    assert result["cache_read_input_tokens"] == 10
+    assert result["cache_creation_input_tokens"] == 5
+
+
+def test_sum_anthropic_tokens():
+    """Test Anthropic token summation."""
+    usage = {
+        "input_tokens": 100,
+        "output_tokens": 50,
+        "cache_read_input_tokens": 10,
+        "cache_creation_input_tokens": 5,
+    }
+
+    result = sum_anthropic_tokens(usage)
+
+    # Should sum all input tokens
+    assert result["input_tokens"] == 115  # 100 + 10 + 5
+    assert result["output_tokens"] == 50
+    # Only standard OpenTelemetry fields in result
+    assert "cache_read_input_tokens" not in result
+    assert "cache_creation_input_tokens" not in result
+    assert "total_tokens" not in result
+
+
+def test_infer_provider_from_base_url():
+    """Test provider inference from ANTHROPIC_BASE_URL."""
+    # Save original env var
+    original_url = os.environ.get("ANTHROPIC_BASE_URL")
+
+    try:
+        # Test DashScope (extended provider)
+        os.environ["ANTHROPIC_BASE_URL"] = (
+            "https://dashscope.aliyuncs.com/apps/anthropic"
+        )
+        assert infer_provider_from_base_url() == "dashscope"
+
+        # Test aliyuncs (alternative check for dashscope)
+        result = infer_provider_from_base_url("https://api.aliyuncs.com/v1")
+        assert result == "dashscope"
+
+        # Test Moonshot (extended provider)
+        result = infer_provider_from_base_url("https://api.moonshot.cn/v1")
+        assert result == "moonshot"
+
+        # Test Anthropic (defaults to anthropic)
+        os.environ["ANTHROPIC_BASE_URL"] = "https://api.anthropic.com"
+        assert infer_provider_from_base_url() == "anthropic"
+
+        # Test ZhipuAI (defaults to anthropic)
+        os.environ["ANTHROPIC_BASE_URL"] = (
+            "https://open.bigmodel.cn/api/anthropic"
+        )
+        assert infer_provider_from_base_url() == "anthropic"
+
+        # Test custom/unknown provider (defaults to anthropic)
+        result = infer_provider_from_base_url(
+            "https://api.unknown-provider.com"
+        )
+        assert result == "anthropic"
+
+        # Test empty (defaults to anthropic)
+        if "ANTHROPIC_BASE_URL" in os.environ:
+            del os.environ["ANTHROPIC_BASE_URL"]
+        assert infer_provider_from_base_url() == "anthropic"
+
+    finally:
+        # Restore original env var
+        if original_url is not None:
+            os.environ["ANTHROPIC_BASE_URL"] = original_url
+        elif "ANTHROPIC_BASE_URL" in os.environ:
+            del os.environ["ANTHROPIC_BASE_URL"]
diff --git a/tox-loongsuite.ini b/tox-loongsuite.ini
index bafb6b6e7..771295468 100644
--- a/tox-loongsuite.ini
+++ b/tox-loongsuite.ini
@@ -20,6 +20,11 @@ envlist =
     py3{9,10,11,12,13}-test-loongsuite-instrumentation-dashscope-{oldest,latest}
     lint-loongsuite-instrumentation-dashscope
 
+    ; loongsuite-instrumentation-claude-agent-sdk
+    py3{9,10,11,12,13}-test-loongsuite-instrumentation-claude-agent-sdk-{oldest,latest}
+    pypy3-test-loongsuite-instrumentation-claude-agent-sdk-{oldest,latest}
+    lint-loongsuite-instrumentation-claude-agent-sdk
+
     ; ; loongsuite-instrumentation-agno
     ; py3{9,10,11,12,13}-test-loongsuite-instrumentation-agno
     ; lint-loongsuite-instrumentation-agno
@@ -61,6 +66,11 @@ deps =
   dashscope-latest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-dashscope/tests/requirements.latest.txt
   lint-loongsuite-instrumentation-dashscope: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-dashscope/tests/requirements.oldest.txt
 
+  claude-agent-sdk-oldest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
+  claude-agent-sdk-latest: {[testenv]test_deps}
+  claude-agent-sdk-latest: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
+  lint-loongsuite-instrumentation-claude-agent-sdk: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
+
   loongsuite-agno: {[testenv]test_deps}
   loongsuite-agno: -r {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-agno/test-requirements.txt
 
@@ -102,6 +112,9 @@ commands =
   test-loongsuite-instrumentation-dashscope: pytest {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-dashscope/tests {posargs}
   lint-loongsuite-instrumentation-dashscope: python -m ruff check {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-dashscope
   
+  test-loongsuite-instrumentation-claude-agent-sdk: pytest {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests {posargs}
+  lint-loongsuite-instrumentation-claude-agent-sdk: python -m ruff check {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk
+  
   test-loongsuite-instrumentation-agno: pytest {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-agno/tests {posargs}
   lint-loongsuite-instrumentation-agno: python -m ruff check {toxinidir}/instrumentation-loongsuite/loongsuite-instrumentation-agno
   

From 0f26754072fe383f6baefe98ac3482b1cc7e330e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 10:47:48 +0800
Subject: [PATCH 02/23] [fix] Require Python >=3.10 and fix test deps

Change-Id: Ie6eab7ffae40e000b3b2c55a0abe50848490c1e7
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../pyproject.toml                                       | 3 +--
 .../tests/requirements.latest.txt                        | 9 +++++----
 .../tests/requirements.oldest.txt                        | 1 +
 tox-loongsuite.ini                                       | 3 +--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
index 0271630ae..fc6fe3b2c 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
@@ -8,7 +8,7 @@ dynamic = ["version"]
 description = "LoongSuite Claude Agent SDK instrumentation"
 readme = "README.rst"
 license = "Apache-2.0"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 authors = [
   { name = "LoongSuite Python Agent Authors", email = "qp467389@alibaba-inc.com" },
 ]
@@ -18,7 +18,6 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
index 6dd17a455..669932732 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
@@ -13,14 +13,15 @@
 # limitations under the License.
 
 # This variant of the requirements aims to test the system using
-# the latest available versions of external dependencies.
+# the newest supported version of external dependencies.
 
 claude-agent-sdk>=0.1.0
-pytest
-pytest-asyncio
-wrapt
+pytest==7.4.4
+pytest-asyncio==0.21.0
+wrapt==1.17.3
 opentelemetry-exporter-otlp-proto-http
 
+-e opentelemetry-instrumentation
 -e instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk
 -e util/opentelemetry-util-genai
 
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
index e642ae450..93c3c2ebe 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
@@ -23,6 +23,7 @@ opentelemetry-exporter-otlp-proto-http~=1.30
 opentelemetry-api==1.37
 opentelemetry-sdk==1.37
 opentelemetry-semantic-conventions==0.58b0
+opentelemetry-instrumentation==0.58b0
 
 -e instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk
 -e util/opentelemetry-util-genai
diff --git a/tox-loongsuite.ini b/tox-loongsuite.ini
index 771295468..dca49b0c5 100644
--- a/tox-loongsuite.ini
+++ b/tox-loongsuite.ini
@@ -21,8 +21,7 @@ envlist =
     lint-loongsuite-instrumentation-dashscope
 
     ; loongsuite-instrumentation-claude-agent-sdk
-    py3{9,10,11,12,13}-test-loongsuite-instrumentation-claude-agent-sdk-{oldest,latest}
-    pypy3-test-loongsuite-instrumentation-claude-agent-sdk-{oldest,latest}
+    py3{10,11,12,13}-test-loongsuite-instrumentation-claude-agent-sdk-{oldest,latest}
     lint-loongsuite-instrumentation-claude-agent-sdk
 
     ; ; loongsuite-instrumentation-agno

From c9d1396c5252c386c2c71e7a3d069098c9f91ba9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 10:54:14 +0800
Subject: [PATCH 03/23] [fix] Remove version constraint from
 opentelemetry-util-genai dependency

Change-Id: I2656979e57ed2e9b3110867f9e5f6321d45cb3e2
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../loongsuite-instrumentation-claude-agent-sdk/pyproject.toml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
index fc6fe3b2c..81af0c9e8 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
   "opentelemetry-semantic-conventions ~= 0.58b0",
   # Note: opentelemetry-util-genai should be installed from local source
   # for extended features (ExtendedTelemetryHandler)
-  "opentelemetry-util-genai >= 0.2b0",
+  "opentelemetry-util-genai",
 ]
 
 [project.optional-dependencies]

From 589ada29205d1bf826da283c9652de97d0f2ce53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 11:20:43 +0800
Subject: [PATCH 04/23] [fix] fix test failed

Change-Id: Ibf6f934583ed0e76a4f79016cbfbade94a05acec
Co-developed-by: Cursor <noreply@cursor.com>
---
 .github/workflows/loongsuite_lint_0.yml       |  19 +++
 .github/workflows/loongsuite_test_0.yml       | 152 ++++++++++++++++++
 .../genai/_multimodal_upload/pre_uploader.py  |   2 +-
 3 files changed, 172 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/loongsuite_lint_0.yml b/.github/workflows/loongsuite_lint_0.yml
index 3c71465c0..970b36109 100644
--- a/.github/workflows/loongsuite_lint_0.yml
+++ b/.github/workflows/loongsuite_lint_0.yml
@@ -70,6 +70,25 @@ jobs:
       - name: Run tests
         run: tox -c tox-loongsuite.ini -e lint-loongsuite-instrumentation-dashscope
 
+  lint-loongsuite-instrumentation-claude-agent-sdk:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e lint-loongsuite-instrumentation-claude-agent-sdk
+
   lint-loongsuite-instrumentation-mem0:
     name: LoongSuite loongsuite-instrumentation-mem0
     runs-on: ubuntu-latest
diff --git a/.github/workflows/loongsuite_test_0.yml b/.github/workflows/loongsuite_test_0.yml
index 36c280c0f..17cfd4013 100644
--- a/.github/workflows/loongsuite_test_0.yml
+++ b/.github/workflows/loongsuite_test_0.yml
@@ -374,6 +374,158 @@ jobs:
       - name: Run tests
         run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-dashscope-latest -- -ra
 
+  py310-test-loongsuite-instrumentation-claude-agent-sdk-oldest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-oldest 3.10 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py310-test-loongsuite-instrumentation-claude-agent-sdk-oldest -- -ra
+
+  py310-test-loongsuite-instrumentation-claude-agent-sdk-latest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-latest 3.10 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py310-test-loongsuite-instrumentation-claude-agent-sdk-latest -- -ra
+
+  py311-test-loongsuite-instrumentation-claude-agent-sdk-oldest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-oldest 3.11 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py311-test-loongsuite-instrumentation-claude-agent-sdk-oldest -- -ra
+
+  py311-test-loongsuite-instrumentation-claude-agent-sdk-latest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-latest 3.11 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py311-test-loongsuite-instrumentation-claude-agent-sdk-latest -- -ra
+
+  py312-test-loongsuite-instrumentation-claude-agent-sdk-oldest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-oldest 3.12 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py312-test-loongsuite-instrumentation-claude-agent-sdk-oldest -- -ra
+
+  py312-test-loongsuite-instrumentation-claude-agent-sdk-latest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-latest 3.12 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py312-test-loongsuite-instrumentation-claude-agent-sdk-latest -- -ra
+
+  py313-test-loongsuite-instrumentation-claude-agent-sdk-oldest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-oldest 3.13 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-claude-agent-sdk-oldest -- -ra
+
+  py313-test-loongsuite-instrumentation-claude-agent-sdk-latest_ubuntu-latest:
+    name: LoongSuite loongsuite-instrumentation-claude-agent-sdk-latest 3.13 Ubuntu
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout repo @ SHA - ${{ github.sha }}
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install tox
+        run: pip install tox-uv
+
+      - name: Run tests
+        run: tox -c tox-loongsuite.ini -e py313-test-loongsuite-instrumentation-claude-agent-sdk-latest -- -ra
+
   py310-test-loongsuite-instrumentation-mem0-oldest_ubuntu-latest:
     name: LoongSuite loongsuite-instrumentation-mem0-oldest 3.10 Ubuntu
     runs-on: ubuntu-latest
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_multimodal_upload/pre_uploader.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_multimodal_upload/pre_uploader.py
index 9259db8c8..7e403c39c 100644
--- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_multimodal_upload/pre_uploader.py
+++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_multimodal_upload/pre_uploader.py
@@ -52,7 +52,7 @@
 
 # Try importing audio processing libraries (optional dependencies)
 try:
-    import numpy as np
+    import numpy as np  # pyright: ignore[reportMissingImports]
     import soundfile as sf  # pyright: ignore[reportMissingImports]
 
     _audio_libs_available = True

From 9fbf95466d0f0ef51aed629b2fd263c9611af0b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 11:34:56 +0800
Subject: [PATCH 05/23] [fix] fix test failed

Change-Id: I1420318408a53e563499c43a3cc2ae86ed0aa929
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../pyproject.toml                            |  5 ++++
 .../tests/conftest.py                         | 23 +++++++++++++++++++
 .../tests/test_attributes.py                  |  8 +++++++
 .../tests/test_mocks.py                       |  1 +
 4 files changed, 37 insertions(+)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
index 81af0c9e8..aefdbdf04 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
@@ -55,3 +55,8 @@ include = [
 [tool.hatch.build.targets.wheel]
 packages = ["src/opentelemetry"]
 
+[tool.pytest.ini_options]
+markers = [
+  "requires_cli: marks tests that require Claude CLI executable (skipped in CI)",
+]
+
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
index b7b42259d..f25b641c1 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
@@ -22,6 +22,29 @@
     "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY"
 )
 
+
+def pytest_configure(config):
+    """Register custom markers."""
+    config.addinivalue_line(
+        "markers",
+        "requires_cli: mark test as requiring Claude CLI executable (skipped in CI)",
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    """Skip tests marked with 'requires_cli' if ANTHROPIC_API_KEY is not set or is mock."""
+    # Check if we have a real API key (not the test mock)
+    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    has_real_api = api_key and api_key != "test_anthropic_api_key"
+
+    skip_cli = pytest.mark.skip(
+        reason="Requires real ANTHROPIC_API_KEY and Claude CLI (not available in CI)"
+    )
+
+    for item in items:
+        if "requires_cli" in item.keywords and not has_real_api:
+            item.add_marker(skip_cli)
+
 from opentelemetry.instrumentation._semconv import (
     OTEL_SEMCONV_STABILITY_OPT_IN,
     _OpenTelemetrySemanticConventionStability,
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
index a5aaecace..11c297334 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
@@ -17,6 +17,7 @@
 )
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_span_attributes_semantic_conventions(instrument, span_exporter):
     """Test that all spans follow semantic conventions."""
@@ -52,6 +53,7 @@ async def test_span_attributes_semantic_conventions(instrument, span_exporter):
                 assert GenAIAttributes.GEN_AI_REQUEST_MODEL in span.attributes
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_agent_span_naming_convention(instrument, span_exporter):
     """Test agent span naming follows conventions."""
@@ -78,6 +80,7 @@ async def test_agent_span_naming_convention(instrument, span_exporter):
     )
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_llm_span_naming_convention(instrument, span_exporter):
     """Test LLM span naming follows conventions."""
@@ -107,6 +110,7 @@ async def test_llm_span_naming_convention(instrument, span_exporter):
     assert "qwen" in llm_span.name.lower() or "qwen-plus" in llm_span.name
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_tool_span_naming_convention(instrument, span_exporter):
     """Test tool span naming follows conventions."""
@@ -133,6 +137,7 @@ async def test_tool_span_naming_convention(instrument, span_exporter):
         assert "execute_tool" in tool_span.name
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_span_context_propagation(instrument, span_exporter):
     """Test that span context is properly propagated."""
@@ -164,6 +169,7 @@ async def test_span_context_propagation(instrument, span_exporter):
             assert span.parent.span_id == agent_span_id
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_token_usage_attributes(instrument, span_exporter):
     """Test that token usage attributes are captured."""
@@ -242,6 +248,7 @@ def test_instrumentor_class_exported():
     assert hasattr(claude_agent_sdk, "__version__")
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_multiple_concurrent_queries(instrument, span_exporter):
     """Test that multiple concurrent queries are handled correctly."""
@@ -271,6 +278,7 @@ async def run_query(prompt):
     assert len(agent_spans) >= 2
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_span_attributes_no_sensitive_data(
     instrument_no_content, span_exporter
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
index 4cd9fe073..62f37f31d 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
@@ -24,6 +24,7 @@
 )
 
 
+@pytest.mark.requires_cli
 @pytest.mark.asyncio
 async def test_agent_span_attributes_complete(instrument, span_exporter):
     """Test that agent span has all required attributes."""

From 462b1a0f8665a770b3d2c350ca787c251a43a2d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 11:39:31 +0800
Subject: [PATCH 06/23] [fix] fix ruff failed

Change-Id: Iab09e483df20ef8ad44545e36d0d83f5ae0cae1d
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../tests/conftest.py                         | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
index f25b641c1..e78f54692 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
@@ -22,6 +22,22 @@
     "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY"
 )
 
+from opentelemetry.instrumentation._semconv import (
+    OTEL_SEMCONV_STABILITY_OPT_IN,
+    _OpenTelemetrySemanticConventionStability,
+)
+from opentelemetry.instrumentation.claude_agent_sdk import (
+    ClaudeAgentSDKInstrumentor,
+)
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
+from opentelemetry.util.genai.environment_variables import (
+    OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
+)
+
 
 def pytest_configure(config):
     """Register custom markers."""
@@ -45,22 +61,6 @@ def pytest_collection_modifyitems(config, items):
         if "requires_cli" in item.keywords and not has_real_api:
             item.add_marker(skip_cli)
 
-from opentelemetry.instrumentation._semconv import (
-    OTEL_SEMCONV_STABILITY_OPT_IN,
-    _OpenTelemetrySemanticConventionStability,
-)
-from opentelemetry.instrumentation.claude_agent_sdk import (
-    ClaudeAgentSDKInstrumentor,
-)
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
-    InMemorySpanExporter,
-)
-from opentelemetry.util.genai.environment_variables import (
-    OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
-)
-
 
 @pytest.fixture(scope="function", name="span_exporter")
 def fixture_span_exporter():

From 5f317f9008b68829cc42691f42a293b4607fe705 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 14:06:42 +0800
Subject: [PATCH 07/23] [fix]:fix llm span time record error

Change-Id: I55134f7e3ef30b7192deab801b12b132b250a31c
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/patch.py | 79 ++++++++++++-------
 1 file changed, 50 insertions(+), 29 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 5221a7073..f1a697784 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -186,43 +186,52 @@ def _process_assistant_message(
     subagent_sessions: Optional[Dict[str, InvokeAgentInvocation]] = None,
 ) -> None:
     """Process AssistantMessage: create LLM turn, extract parts, create tool spans."""
-    # Start a new LLM turn (closes previous one if exists)
-    turn_tracker.start_llm_turn(
-        msg,
-        model,
-        prompt,
-        collected_messages,
-        provider=infer_provider_from_base_url(),
-    )
-
-    # Extract message parts
     parts = _extract_message_parts(msg)
-
-    # Add output to LLM turn and Agent invocation
-    if parts:
-        turn_tracker.add_assistant_output(parts)
-        output_msg = OutputMessage(
-            role="assistant", parts=parts, finish_reason="stop"
+    has_text_content = any(isinstance(p, Text) for p in parts)
+
+    if has_text_content:
+        # This is the start of a new LLM response (with text content)
+        message_arrival_time = time.time()
+
+        turn_tracker.start_llm_turn(
+            msg,
+            model,
+            prompt,
+            collected_messages,
+            provider=infer_provider_from_base_url(),
+            message_arrival_time=message_arrival_time,
         )
-        agent_invocation.output_messages.append(output_msg)
 
-        # Collect assistant message for next turn's input
-        text_parts = [p.content for p in parts if isinstance(p, Text)]
-        if text_parts:
-            collected_messages.append(
-                {"role": "assistant", "content": " ".join(text_parts)}
+        if parts:
+            turn_tracker.add_assistant_output(parts)
+            output_msg = OutputMessage(
+                role="assistant", parts=parts, finish_reason="stop"
             )
+            agent_invocation.output_messages.append(output_msg)
 
-    # Close LLM span before creating tool spans (ensures tool spans are siblings of LLM spans)
-    turn_tracker.close_llm_turn()
+            text_parts = [p.content for p in parts if isinstance(p, Text)]
+            if text_parts:
+                collected_messages.append(
+                    {"role": "assistant", "content": " ".join(text_parts)}
+                )
+
+    else:
+        # This is a tool-only message, part of the current LLM turn
+        # Append it to the current LLM invocation's output
+        if parts and turn_tracker.current_llm_invocation:
+            turn_tracker.add_assistant_output(parts)
+            output_msg = OutputMessage(
+                role="assistant", parts=parts, finish_reason="stop"
+            )
+            agent_invocation.output_messages.append(output_msg)
+
+        turn_tracker.close_llm_turn()
 
-    # Process Task subagents if enabled
     if process_subagents and subagent_sessions is not None:
         _handle_task_subagents(
             msg, agent_invocation, subagent_sessions, handler
         )
 
-    # Create tool spans (exclude Task if processing subagents)
     exclude_tools = ["Task"] if process_subagents else []
     _create_tool_spans_from_message(
         msg, handler, exclude_tool_names=exclude_tools
@@ -241,7 +250,9 @@ def _process_user_message(
     if user_text_parts:
         user_content = " ".join(user_text_parts)
         collected_messages.append({"role": "user", "content": user_content})
-        turn_tracker.mark_next_llm_start()
+
+    # Always mark next LLM start when UserMessage arrives
+    turn_tracker.mark_next_llm_start()
 
 
 def _process_result_message(
@@ -280,9 +291,19 @@ def start_llm_turn(
         prompt: str,
         collected_messages: List[Dict[str, Any]],
         provider: str = "anthropic",
+        message_arrival_time: Optional[float] = None,
     ) -> Optional[LLMInvocation]:
-        """Start a new LLM invocation span with pre-recorded start time."""
-        start_time = self.next_llm_start_time or time.time()
+        """Start a new LLM invocation span with pre-recorded start time.
+
+        Args:
+            message_arrival_time: The time when the AssistantMessage arrived.
+                If next_llm_start_time is set (from previous UserMessage), use that.
+                Otherwise, use message_arrival_time or fall back to current time.
+        """
+        # Priority: next_llm_start_time > message_arrival_time > current time
+        start_time = (
+            self.next_llm_start_time or message_arrival_time or time.time()
+        )
 
         if self.current_llm_invocation:
             self.handler.stop_llm(self.current_llm_invocation)

From 96e375f27c5342475aedcc18bc212c7f7d808c3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Fri, 9 Jan 2026 14:42:22 +0800
Subject: [PATCH 08/23] [fix]:fix agent context error

Change-Id: I44bf3572fa7d8bf7d47b665d3463433a93076f9c
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../opentelemetry/instrumentation/claude_agent_sdk/patch.py  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index f1a697784..705cf4b04 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -21,6 +21,7 @@
 from claude_agent_sdk import HookMatcher
 from claude_agent_sdk.types import ClaudeAgentOptions
 
+from opentelemetry import context as otel_context
 from opentelemetry.instrumentation.claude_agent_sdk.context import (
     clear_parent_invocation,
     set_parent_invocation,
@@ -548,6 +549,8 @@ async def wrap_claude_client_receive_response(
         else [],
     )
 
+    # Clear context to create a new root trace for each independent query
+    otel_context.attach(otel_context.Context())
     handler.start_invoke_agent(agent_invocation)
     set_parent_invocation(agent_invocation)
 
@@ -647,6 +650,8 @@ async def wrap_query(wrapped, instance, args, kwargs, handler=None):
         else [],
     )
 
+    # Clear context to create a new root trace for each independent query
+    otel_context.attach(otel_context.Context())
     handler.start_invoke_agent(agent_invocation)
     set_parent_invocation(agent_invocation)
 

From cb87246a910dfb1069737111da075d51cf60ed94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 12 Jan 2026 01:40:34 +0800
Subject: [PATCH 09/23] [fix] fix handler reset and model fallback

Change-Id: I4308f8d8ecc7048c1bc9d1d9ec824addad1c5912
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../claude_agent_sdk/__init__.py              |  5 +++--
 .../instrumentation/claude_agent_sdk/patch.py |  6 ++----
 .../instrumentation/claude_agent_sdk/utils.py | 21 +++++++++++++++++++
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
index e6b6be224..1c8e23f47 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
@@ -70,9 +70,10 @@ class ClaudeAgentSDKInstrumentor(BaseInstrumentor):
     Instrumentor for Claude Agent SDK.
     """
 
+    _handler: Optional[ExtendedTelemetryHandler] = None
+
     def __init__(self):
         super().__init__()
-        self._handler: Optional[ExtendedTelemetryHandler] = None
 
     def instrumentation_dependencies(self) -> Collection[str]:
         """Return the dependencies required for this instrumentation."""
@@ -173,7 +174,7 @@ def _uninstrument(self, **kwargs: Any) -> None:
         except Exception as e:
             logger.warning(f"Failed to uninstrument Claude Agent SDK: {e}")
 
-        self._handler = None
+        ClaudeAgentSDKInstrumentor._handler = None
 
 
 __all__ = [
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 705cf4b04..172fd24dd 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -34,6 +34,7 @@
 )
 from opentelemetry.instrumentation.claude_agent_sdk.utils import (
     extract_usage_from_result_message,
+    get_model_from_options_or_env,
     infer_provider_from_base_url,
 )
 from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler
@@ -418,7 +419,6 @@ def _inject_tracing_hooks(options: Any) -> None:
 
         options.hooks["PreToolUse"].insert(0, otel_pre_matcher)
         options.hooks["PostToolUse"].insert(0, otel_post_matcher)
-        logger.warning("Failed to import HookMatcher from claude_agent_sdk")
     except Exception as e:
         logger.warning(f"Failed to inject tracing hooks: {e}")
 
@@ -633,9 +633,7 @@ async def wrap_query(wrapped, instance, args, kwargs, handler=None):
         except Exception as e:
             logger.warning(f"Failed to create ClaudeAgentOptions: {e}")
 
-    model = "unknown"
-    if options:
-        model = getattr(options, "model", "unknown")
+    model = get_model_from_options_or_env(options)
 
     prompt_str = str(prompt) if isinstance(prompt, str) else ""
     agent_invocation = InvokeAgentInvocation(
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
index 29c221f98..a66683995 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
@@ -30,6 +30,27 @@
 logger = logging.getLogger(__name__)
 
 
+def get_model_from_options_or_env(options: Any) -> str:
+    """
+    Get model name from options or environment variables.
+    """
+    model = "unknown"
+
+    if options:
+        model = getattr(options, "model", None)
+
+        # Key: If options.model is None, read from environment variables
+        # This mimics Claude CLI behavior: when no --model parameter, CLI reads environment variables
+        if not model:
+            model = (
+                os.getenv("ANTHROPIC_MODEL")
+                or os.getenv("ANTHROPIC_SMALL_FAST_MODEL")
+                or "unknown"
+            )
+
+    return model
+
+
 def infer_provider_from_base_url(base_url: Optional[str] = None) -> str:
     """
     Infer the provider name from ANTHROPIC_BASE_URL environment variable.

From 9f42fcc04b10c87bb195eee0d04d406c37899ed3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 12 Jan 2026 13:56:40 +0800
Subject: [PATCH 10/23] [fix] fix tool call status

Change-Id: Ice677164663bc066e830b3cd24c09697d2dd0fb1
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/hooks.py             | 8 ++++----
 .../instrumentation/claude_agent_sdk/patch.py             | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
index d32aeb3aa..a3d73ea1f 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
@@ -158,10 +158,10 @@ async def post_tool_use_hook(
             # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
             client_invocation.tool_call_result = tool_response
 
-            # Check if error
             is_error = False
             if isinstance(tool_response, dict):
-                is_error = tool_response.get("is_error", False)
+                is_error_value = tool_response.get("is_error")
+                is_error = is_error_value is True
 
             if is_error:
                 error_msg = (
@@ -192,10 +192,10 @@ async def post_tool_use_hook(
         # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
         tool_invocation.tool_call_result = tool_response
 
-        # Check if error
         is_error = False
         if isinstance(tool_response, dict):
-            is_error = tool_response.get("is_error", False)
+            is_error_value = tool_response.get("is_error")
+            is_error = is_error_value is True
 
         if is_error:
             error_msg = (
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 172fd24dd..b10e97f05 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -130,7 +130,8 @@ def _close_tool_spans_from_message(
 
                 # Set tool response
                 tool_content = getattr(block, "content", None)
-                is_error = getattr(block, "is_error", False)
+                is_error_value = getattr(block, "is_error", None)
+                is_error = is_error_value is True
 
                 tool_invocation.tool_call_result = tool_content
 

From c38bdcd31e0a06190f298b25c4f0ce9507b7137b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 12 Jan 2026 14:16:42 +0800
Subject: [PATCH 11/23] [fix] fix test error

Change-Id: Ieadc2e7ea2193e301c171c62eee9da3179c95199
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../tests/test_attributes.py                                     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
index 11c297334..af1d5e1bc 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
@@ -100,6 +100,7 @@ async def test_llm_span_naming_convention(instrument, span_exporter):
         s
         for s in spans
         if GenAIAttributes.GEN_AI_OPERATION_NAME in s.attributes
+        and s.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat"
     ]
 
     assert len(llm_spans) >= 1

From 3c6484424e179ca9f52956f6d35aef6970e450ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Tue, 13 Jan 2026 10:34:56 +0800
Subject: [PATCH 12/23] [fix] fix test error

Change-Id: I4464ce9be8aab760c5b6a497e6b5bfd865d927ed
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/__init__.py       | 10 +++++-----
 .../tests/conftest.py                                  |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
index 1c8e23f47..85d5a8200 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
@@ -93,7 +93,7 @@ def _instrument(self, **kwargs: Any) -> None:
         logger_provider = kwargs.get("logger_provider")
 
         # Create ExtendedTelemetryHandler
-        self._handler = ExtendedTelemetryHandler(
+        ClaudeAgentSDKInstrumentor._handler = ExtendedTelemetryHandler(
             tracer_provider=tracer_provider,
             meter_provider=meter_provider,
             logger_provider=logger_provider,
@@ -108,7 +108,7 @@ def _instrument(self, **kwargs: Any) -> None:
                 instance,
                 args,
                 kwargs: wrap_claude_client_init(
-                    wrapped, instance, args, kwargs, handler=self._handler
+                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
                 ),
             )
         except Exception as e:
@@ -125,7 +125,7 @@ def _instrument(self, **kwargs: Any) -> None:
                 instance,
                 args,
                 kwargs: wrap_claude_client_query(
-                    wrapped, instance, args, kwargs, handler=self._handler
+                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
                 ),
             )
         except Exception as e:
@@ -140,7 +140,7 @@ def _instrument(self, **kwargs: Any) -> None:
                 instance,
                 args,
                 kwargs: wrap_claude_client_receive_response(
-                    wrapped, instance, args, kwargs, handler=self._handler
+                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
                 ),
             )
         except Exception as e:
@@ -154,7 +154,7 @@ def _instrument(self, **kwargs: Any) -> None:
                 module="claude_agent_sdk",
                 name="query",
                 wrapper=lambda wrapped, instance, args, kwargs: wrap_query(
-                    wrapped, instance, args, kwargs, handler=self._handler
+                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
                 ),
             )
         except Exception as e:
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
index e78f54692..02ecf21f2 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest.py
@@ -97,7 +97,7 @@ def instrument_no_content(tracer_provider):
     os.environ.update(
         {
             OTEL_SEMCONV_STABILITY_OPT_IN: "gen_ai_latest_experimental",
-            OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "NO_CONTENT",
+            OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "false",
         }
     )
 

From 559916d4c69da049947f5c385427483bc957e1d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 19 Jan 2026 15:44:56 +0800
Subject: [PATCH 13/23] [fix] fix test error

Change-Id: I39681d6a7c105b1653d75c7f8397a30bc94b785a
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../claude_agent_sdk/__init__.py              |  24 +-
 .../instrumentation/claude_agent_sdk/patch.py | 319 ++++++------------
 2 files changed, 120 insertions(+), 223 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
index 85d5a8200..591f911c6 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
@@ -108,7 +108,11 @@ def _instrument(self, **kwargs: Any) -> None:
                 instance,
                 args,
                 kwargs: wrap_claude_client_init(
-                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
+                    wrapped,
+                    instance,
+                    args,
+                    kwargs,
+                    handler=ClaudeAgentSDKInstrumentor._handler,
                 ),
             )
         except Exception as e:
@@ -125,7 +129,11 @@ def _instrument(self, **kwargs: Any) -> None:
                 instance,
                 args,
                 kwargs: wrap_claude_client_query(
-                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
+                    wrapped,
+                    instance,
+                    args,
+                    kwargs,
+                    handler=ClaudeAgentSDKInstrumentor._handler,
                 ),
             )
         except Exception as e:
@@ -140,7 +148,11 @@ def _instrument(self, **kwargs: Any) -> None:
                 instance,
                 args,
                 kwargs: wrap_claude_client_receive_response(
-                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
+                    wrapped,
+                    instance,
+                    args,
+                    kwargs,
+                    handler=ClaudeAgentSDKInstrumentor._handler,
                 ),
             )
         except Exception as e:
@@ -154,7 +166,11 @@ def _instrument(self, **kwargs: Any) -> None:
                 module="claude_agent_sdk",
                 name="query",
                 wrapper=lambda wrapped, instance, args, kwargs: wrap_query(
-                    wrapped, instance, args, kwargs, handler=ClaudeAgentSDKInstrumentor._handler
+                    wrapped,
+                    instance,
+                    args,
+                    kwargs,
+                    handler=ClaudeAgentSDKInstrumentor._handler,
                 ),
             )
         except Exception as e:
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index b10e97f05..01da33512 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -24,6 +24,7 @@
 from opentelemetry import context as otel_context
 from opentelemetry.instrumentation.claude_agent_sdk.context import (
     clear_parent_invocation,
+    get_parent_invocation,
     set_parent_invocation,
 )
 from opentelemetry.instrumentation.claude_agent_sdk.hooks import (
@@ -106,6 +107,7 @@ def _create_tool_spans_from_message(
             )
             handler.start_execute_tool(tool_invocation)
             _client_managed_runs[tool_use_id] = tool_invocation
+
         except Exception as e:
             logger.warning(f"Failed to create tool span for {tool_name}: {e}")
 
@@ -135,7 +137,7 @@ def _close_tool_spans_from_message(
 
                 tool_invocation.tool_call_result = tool_content
 
-                # Complete span
+                # Complete tool span
                 if is_error:
                     error_msg = (
                         str(tool_content)
@@ -185,8 +187,6 @@ def _process_assistant_message(
     turn_tracker: "AssistantTurnTracker",
     handler: ExtendedTelemetryHandler,
     collected_messages: List[Dict[str, Any]],
-    process_subagents: bool = False,
-    subagent_sessions: Optional[Dict[str, InvokeAgentInvocation]] = None,
 ) -> None:
     """Process AssistantMessage: create LLM turn, extract parts, create tool spans."""
     parts = _extract_message_parts(msg)
@@ -230,15 +230,7 @@ def _process_assistant_message(
 
         turn_tracker.close_llm_turn()
 
-    if process_subagents and subagent_sessions is not None:
-        _handle_task_subagents(
-            msg, agent_invocation, subagent_sessions, handler
-        )
-
-    exclude_tools = ["Task"] if process_subagents else []
-    _create_tool_spans_from_message(
-        msg, handler, exclude_tool_names=exclude_tools
-    )
+    _create_tool_spans_from_message(msg, handler)
 
 
 def _process_user_message(
@@ -274,6 +266,88 @@ def _process_result_message(
     _update_token_usage(agent_invocation, turn_tracker, msg)
 
 
+async def _process_agent_invocation_stream(
+    wrapped_stream,
+    handler: ExtendedTelemetryHandler,
+    model: str,
+    prompt: str,
+) -> Any:
+    """Unified handler for processing agent invocation stream.
+
+    Yields:
+        Messages from the wrapped stream
+    """
+    agent_invocation = InvokeAgentInvocation(
+        provider=infer_provider_from_base_url(),
+        agent_name="claude-agent",
+        request_model=model,
+        conversation_id="",
+        input_messages=[
+            InputMessage(role="user", parts=[Text(content=prompt)])
+        ]
+        if prompt
+        else [],
+    )
+
+    # Clear context to create a new root trace for each independent query
+    otel_context.attach(otel_context.Context())
+    handler.start_invoke_agent(agent_invocation)
+    set_parent_invocation(agent_invocation)
+
+    query_start_time = time.time()
+    turn_tracker = AssistantTurnTracker(
+        handler, query_start_time=query_start_time
+    )
+
+    collected_messages: List[Dict[str, Any]] = []
+
+    try:
+        async for msg in wrapped_stream:
+            msg_type = type(msg).__name__
+
+            if msg_type == "AssistantMessage":
+                _process_assistant_message(
+                    msg,
+                    model,
+                    prompt,
+                    agent_invocation,
+                    turn_tracker,
+                    handler,
+                    collected_messages,
+                )
+            elif msg_type == "UserMessage":
+                _process_user_message(
+                    msg,
+                    turn_tracker,
+                    handler,
+                    collected_messages,
+                )
+            elif msg_type == "ResultMessage":
+                _process_result_message(msg, agent_invocation, turn_tracker)
+
+            yield msg
+
+        # Handle successful completion
+        handler.stop_invoke_agent(agent_invocation)
+
+    except Exception as e:
+        # Handle error
+        error_msg = str(e)
+        if agent_invocation.span:
+            agent_invocation.span.set_attribute("error.type", type(e).__name__)
+            agent_invocation.span.set_attribute("error.message", error_msg)
+        handler.fail_invoke_agent(
+            agent_invocation, error=Error(message=error_msg, type=type(e))
+        )
+
+        raise
+    finally:
+        # Cleanup
+        turn_tracker.close()
+        clear_active_tool_runs()
+        clear_parent_invocation()
+
+
 class AssistantTurnTracker:
     """Track LLM invocations (assistant turns) in a Claude Agent conversation."""
 
@@ -452,72 +526,6 @@ def wrap_claude_client_query(wrapped, instance, args, kwargs, handler=None):
     return wrapped(*args, **kwargs)
 
 
-def _handle_task_subagents(
-    msg: Any,
-    agent_invocation: InvokeAgentInvocation,
-    subagent_sessions: Dict[str, InvokeAgentInvocation],
-    handler: ExtendedTelemetryHandler,
-) -> None:
-    """Process Task tool uses (subagents) in an assistant message."""
-    if not hasattr(msg, "content"):
-        return
-
-    parent_tool_use_id = getattr(msg, "parent_tool_use_id", None)
-
-    for block in msg.content:
-        if type(block).__name__ != "ToolUseBlock":
-            continue
-
-        try:
-            tool_use_id = getattr(block, "id", None)
-            tool_name = getattr(block, "name", "unknown_tool")
-            tool_input = getattr(block, "input", {})
-
-            if not tool_use_id:
-                continue
-
-            # Only handle Task subagents here (Regular tools are handled by hooks)
-            if tool_name == "Task" and not parent_tool_use_id:
-                # Extract subagent name from input
-                subagent_name = (
-                    tool_input.get("subagent_type")
-                    or (
-                        tool_input.get("description", "").split()[0]
-                        if tool_input.get("description")
-                        else None
-                    )
-                    or "unknown-agent"
-                )
-
-                # Create subagent session span
-                subagent_invocation = InvokeAgentInvocation(
-                    provider=infer_provider_from_base_url(),
-                    agent_name=subagent_name,
-                    request_model=agent_invocation.request_model,
-                    conversation_id="",
-                    input_messages=[
-                        InputMessage(
-                            role="user", parts=[Text(content=str(tool_input))]
-                        )
-                    ],
-                    attributes={
-                        "subagent_type": tool_input.get("subagent_type", ""),
-                        "parent_tool_use_id": parent_tool_use_id or "",
-                    },
-                )
-
-                handler.start_invoke_agent(subagent_invocation)
-                subagent_sessions[tool_use_id] = subagent_invocation
-
-                # Mark as client-managed so hooks don't duplicate it
-                _client_managed_runs[tool_use_id] = ExecuteToolInvocation(
-                    tool_name="Task",
-                    tool_call_id=tool_use_id,
-                    tool_call_arguments=tool_input,
-                )
-
-        except Exception as e:
-            logger.warning(f"Failed to create subagent session: {e}")
 
 
 async def wrap_claude_client_receive_response(
@@ -538,79 +546,13 @@ async def wrap_claude_client_receive_response(
     if hasattr(instance, "options") and instance.options:
         model = getattr(instance.options, "model", "unknown")
 
-    agent_invocation = InvokeAgentInvocation(
-        provider=infer_provider_from_base_url(),
-        agent_name="claude-agent",
-        request_model=model,
-        conversation_id="",
-        input_messages=[
-            InputMessage(role="user", parts=[Text(content=prompt)])
-        ]
-        if prompt
-        else [],
-    )
-
-    # Clear context to create a new root trace for each independent query
-    otel_context.attach(otel_context.Context())
-    handler.start_invoke_agent(agent_invocation)
-    set_parent_invocation(agent_invocation)
-
-    query_start_time = time.time()
-    turn_tracker = AssistantTurnTracker(
-        handler, query_start_time=query_start_time
-    )
-
-    collected_messages: List[Dict[str, Any]] = []
-    subagent_sessions: Dict[str, InvokeAgentInvocation] = {}
-
-    try:
-        async for msg in wrapped(*args, **kwargs):
-            msg_type = type(msg).__name__
-
-            if msg_type == "AssistantMessage":
-                _process_assistant_message(
-                    msg,
-                    model,
-                    prompt,
-                    agent_invocation,
-                    turn_tracker,
-                    handler,
-                    collected_messages,
-                    process_subagents=True,
-                    subagent_sessions=subagent_sessions,
-                )
-
-            elif msg_type == "UserMessage":
-                _process_user_message(
-                    msg, turn_tracker, handler, collected_messages
-                )
-
-            elif msg_type == "ResultMessage":
-                _process_result_message(msg, agent_invocation, turn_tracker)
-
-            yield msg
-
-        handler.stop_invoke_agent(agent_invocation)
-
-        for subagent_invocation in subagent_sessions.values():
-            try:
-                handler.stop_invoke_agent(subagent_invocation)
-            except Exception as e:
-                logger.warning(f"Failed to complete subagent session: {e}")
-
-    except Exception as e:
-        error_msg = str(e)
-        if agent_invocation.span:
-            agent_invocation.span.set_attribute("error.type", type(e).__name__)
-            agent_invocation.span.set_attribute("error.message", error_msg)
-        handler.fail_invoke_agent(
-            agent_invocation, error=Error(message=error_msg, type=type(e))
-        )
-        raise
-    finally:
-        turn_tracker.close()
-        clear_active_tool_runs()
-        clear_parent_invocation()
+    async for msg in _process_agent_invocation_stream(
+        wrapped(*args, **kwargs),
+        handler=handler,
+        model=model,
+        prompt=prompt,
+    ):
+        yield msg
 
 
 async def wrap_query(wrapped, instance, args, kwargs, handler=None):
@@ -635,73 +577,12 @@ async def wrap_query(wrapped, instance, args, kwargs, handler=None):
             logger.warning(f"Failed to create ClaudeAgentOptions: {e}")
 
     model = get_model_from_options_or_env(options)
-
     prompt_str = str(prompt) if isinstance(prompt, str) else ""
-    agent_invocation = InvokeAgentInvocation(
-        provider=infer_provider_from_base_url(),
-        agent_name="claude-agent",
-        request_model=model,
-        conversation_id="",
-        input_messages=[
-            InputMessage(role="user", parts=[Text(content=prompt_str)])
-        ]
-        if prompt_str
-        else [],
-    )
-
-    # Clear context to create a new root trace for each independent query
-    otel_context.attach(otel_context.Context())
-    handler.start_invoke_agent(agent_invocation)
-    set_parent_invocation(agent_invocation)
-
-    query_start_time = time.time()
-    turn_tracker = AssistantTurnTracker(
-        handler, query_start_time=query_start_time
-    )
-
-    collected_messages: List[Dict[str, Any]] = []
-
-    try:
-        async for message in wrapped(*args, **kwargs):
-            msg_type = type(message).__name__
 
-            if msg_type == "AssistantMessage":
-                _process_assistant_message(
-                    message,
-                    model,
-                    prompt_str,
-                    agent_invocation,
-                    turn_tracker,
-                    handler,
-                    collected_messages,
-                    process_subagents=False,
-                    subagent_sessions=None,
-                )
-
-            elif msg_type == "UserMessage":
-                _process_user_message(
-                    message, turn_tracker, handler, collected_messages
-                )
-
-            elif msg_type == "ResultMessage":
-                _process_result_message(
-                    message, agent_invocation, turn_tracker
-                )
-
-            yield message
-
-        handler.stop_invoke_agent(agent_invocation)
-
-    except Exception as e:
-        error_msg = str(e)
-        if agent_invocation.span:
-            agent_invocation.span.set_attribute("error.type", type(e).__name__)
-            agent_invocation.span.set_attribute("error.message", error_msg)
-        handler.fail_invoke_agent(
-            agent_invocation, error=Error(message=error_msg, type=type(e))
-        )
-        raise
-    finally:
-        turn_tracker.close()
-        clear_active_tool_runs()
-        clear_parent_invocation()
+    async for message in _process_agent_invocation_stream(
+        wrapped(*args, **kwargs),
+        handler=handler,
+        model=model,
+        prompt=prompt_str,
+    ):
+        yield message

From a758c9b4d4847af0b614a3c5ac482f0fae1b5bc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Tue, 20 Jan 2026 00:21:51 +0800
Subject: [PATCH 14/23] [fix] fix tool span error

Change-Id: I9694ccab2f5bf9eb8d24deb644b78f60555a5bdf
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/hooks.py | 124 +++++++++++++++---
 .../instrumentation/claude_agent_sdk/patch.py |  80 ++++++++++-
 2 files changed, 180 insertions(+), 24 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
index a3d73ea1f..63a12a04e 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
@@ -26,6 +26,7 @@
 from opentelemetry.util.genai.extended_types import ExecuteToolInvocation
 from opentelemetry.util.genai.types import Error
 
+
 if TYPE_CHECKING:
     from claude_agent_sdk import (
         HookContext,
@@ -43,6 +44,10 @@
 # Key: tool_use_id, Value: tool_invocation
 _client_managed_runs: Dict[str, ExecuteToolInvocation] = {}
 
+# Storage for Task tool invocations, used to parent subagent tool calls
+# Key: session_id, Value: Task tool invocation
+_task_tool_invocations: Dict[str, ExecuteToolInvocation] = {}
+
 
 async def pre_tool_use_hook(
     input_data: "HookInput",
@@ -76,6 +81,14 @@ async def pre_tool_use_hook(
     try:
         handler = get_extended_telemetry_handler()
         parent_invocation = get_parent_invocation()
+        
+        # For subagent tool calls: if there's an active Task tool for this session,
+        # use Task tool as parent instead of subagent's invoke_agent
+        if session_id and session_id in _task_tool_invocations:
+            task_tool_invocation = _task_tool_invocations[session_id]
+            if task_tool_invocation and task_tool_invocation.span:
+                # Use Task tool as parent for subagent tool calls
+                parent_invocation = task_tool_invocation
 
         if not parent_invocation:
             return {}
@@ -100,19 +113,62 @@ async def pre_tool_use_hook(
         if parent_invocation and parent_invocation.span:
             # Create child span in parent's context
             ctx = set_span_in_context(parent_invocation.span)
-            token = otel_context.attach(ctx)
+            parent_token = otel_context.attach(ctx)
 
             try:
+                # start_execute_tool will create tool span and attach tool context
                 handler.start_execute_tool(tool_invocation)
+                
+                # For Task tool: keep tool context active so subagent spans can be children
+                # For other tools: immediately detach tool context to restore parent context
+                # This ensures subsequent spans (LLM, other tools) are created
+                # as siblings of tool span, not children (except for Task tool)
+                if tool_name != "Task":
+                    # Immediately detach tool context for non-Task tools
+                    if tool_invocation.context_token is not None:
+                        try:
+                            otel_context.detach(tool_invocation.context_token)
+                            tool_invocation.context_token = None
+                        except (ValueError, RuntimeError):
+                            # Token already detached or from different context, ignore
+                            tool_invocation.context_token = None
+                        except Exception:
+                            # Other errors: set to None to prevent handler from trying to detach again
+                            # This ensures handler.stop_execute_tool won't fail
+                            tool_invocation.context_token = None
+                # For Task tool, keep context_token attached so subagent spans can be children
+                # Also save Task tool invocation for subagent tool calls to use as parent
+                if tool_name == "Task" and session_id:
+                    _task_tool_invocations[session_id] = tool_invocation
             finally:
-                # Detach after starting span
-                try:
-                    otel_context.detach(token)
-                except Exception:
-                    pass  # Ignore detach errors
+                # Detach parent context to restore original context
+                if parent_token is not None:
+                    try:
+                        otel_context.detach(parent_token)
+                    except (ValueError, RuntimeError):
+                        # Token already detached or from different context, ignore
+                        pass
+                    except Exception as e:
+                        # Other errors, log but don't raise
+                        logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
         else:
             # Fallback to auto-parenting (may not work due to broken context)
             handler.start_execute_tool(tool_invocation)
+            # For non-Task tools, detach tool context immediately to avoid polluting context
+            if tool_name != "Task":
+                if tool_invocation.context_token is not None:
+                    try:
+                        otel_context.detach(tool_invocation.context_token)
+                        tool_invocation.context_token = None
+                    except (ValueError, RuntimeError):
+                        # Token already detached or from different context, ignore
+                        tool_invocation.context_token = None
+                    except Exception:
+                        # Other errors, keep token for handler to handle
+                        pass
+            # Save Task tool invocation for subagent tool calls
+            if tool_name == "Task" and session_id:
+                _task_tool_invocations[session_id] = tool_invocation
 
         _active_tool_runs[tool_use_id] = (tool_invocation, handler)
 
@@ -192,20 +248,50 @@ async def post_tool_use_hook(
         # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
         tool_invocation.tool_call_result = tool_response
 
-        is_error = False
-        if isinstance(tool_response, dict):
-            is_error_value = tool_response.get("is_error")
-            is_error = is_error_value is True
+        # Ensure we're in parent context before stopping tool span
+        # This prevents subsequent spans from being created as children of tool span
+        parent_invocation = get_parent_invocation()
+        parent_token = None
+        if parent_invocation and parent_invocation.span:
+            ctx = set_span_in_context(parent_invocation.span)
+            parent_token = otel_context.attach(ctx)
 
-        if is_error:
-            error_msg = (
-                str(tool_response) if tool_response else "Tool execution error"
-            )
-            handler.fail_execute_tool(
-                tool_invocation, Error(message=error_msg, type=RuntimeError)
-            )
-        else:
-            handler.stop_execute_tool(tool_invocation)
+        try:
+            is_error = False
+            if isinstance(tool_response, dict):
+                is_error_value = tool_response.get("is_error")
+                is_error = is_error_value is True
+
+            # For non-Task tools: context_token was already set to None in pre_tool_use_hook
+            # For Task tools: context_token is still valid, handler will detach it
+            if is_error:
+                error_msg = (
+                    str(tool_response) if tool_response else "Tool execution error"
+                )
+                handler.fail_execute_tool(
+                    tool_invocation, Error(message=error_msg, type=RuntimeError)
+                )
+            else:
+                handler.stop_execute_tool(tool_invocation)
+        finally:
+            # Clean up Task tool invocation from storage
+            if tool_invocation.tool_name == "Task":
+                session_id = input_data.get("session_id", "")
+                if session_id and session_id in _task_tool_invocations:
+                    del _task_tool_invocations[session_id]
+            
+            # Restore parent context after stopping tool span
+            # This ensures subsequent spans (LLM, other tools) are created
+            # as siblings of tool span, not children
+            if parent_token is not None:
+                try:
+                    otel_context.detach(parent_token)
+                except (ValueError, RuntimeError):
+                    # Token already detached or from different context, ignore
+                    pass
+                except Exception as e:
+                    # Other errors, log but don't raise
+                    logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
 
     except Exception as e:
         logger.warning(
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 01da33512..b9bbf73cd 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -105,7 +105,48 @@ def _create_tool_spans_from_message(
                 tool_call_arguments=tool_input,
                 tool_description=tool_name,
             )
-            handler.start_execute_tool(tool_invocation)
+            
+            # Ensure we're in parent context before starting tool span
+            parent_invocation = get_parent_invocation()
+            parent_token = None
+            if parent_invocation and parent_invocation.span:
+                from opentelemetry.trace import set_span_in_context
+                ctx = set_span_in_context(parent_invocation.span)
+                parent_token = otel_context.attach(ctx)
+            
+            try:
+                handler.start_execute_tool(tool_invocation)
+                
+                # For Task tool: keep tool context active so subagent spans can be children
+                # For other tools: immediately detach tool context to restore parent context
+                # This ensures subsequent spans (LLM, other tools) are created
+                # as siblings of tool span, not children (except for Task tool)
+                if tool_name != "Task":
+                    # Immediately detach tool context for non-Task tools
+                    if tool_invocation.context_token is not None:
+                        try:
+                            otel_context.detach(tool_invocation.context_token)
+                            tool_invocation.context_token = None
+                        except (ValueError, RuntimeError):
+                            # Token already detached or from different context, ignore
+                            tool_invocation.context_token = None
+                        except Exception:
+                            # Other errors: set to None to prevent handler from trying to detach again
+                            # This ensures handler.stop_execute_tool won't fail
+                            tool_invocation.context_token = None
+                # For Task tool, keep context_token attached so subagent spans can be children
+            finally:
+                # Restore context after starting tool span
+                if parent_token is not None:
+                    try:
+                        otel_context.detach(parent_token)
+                    except (ValueError, RuntimeError):
+                        # Token already detached or from different context, ignore
+                        pass
+                    except Exception as e:
+                        # Other errors, log but don't raise
+                        logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
+            
             _client_managed_runs[tool_use_id] = tool_invocation
 
         except Exception as e:
@@ -130,6 +171,10 @@ def _close_tool_spans_from_message(
             if tool_use_id and tool_use_id in _client_managed_runs:
                 tool_invocation = _client_managed_runs.pop(tool_use_id)
 
+                # For Task tool: let stop_execute_tool/fail_execute_tool handle detach
+                # They will check if context_token is None and handle it properly
+                # We don't need to manually detach here, as the handler methods will do it
+
                 # Set tool response
                 tool_content = getattr(block, "content", None)
                 is_error_value = getattr(block, "is_error", None)
@@ -138,6 +183,8 @@ def _close_tool_spans_from_message(
                 tool_invocation.tool_call_result = tool_content
 
                 # Complete tool span
+                # For non-Task tools: context_token was already set to None in _create_tool_spans_from_message
+                # For Task tools: context_token is still valid, handler will detach it
                 if is_error:
                     error_msg = (
                         str(tool_content)
@@ -411,7 +458,28 @@ def start_llm_turn(
             input_messages=input_messages,
         )
 
-        self.handler.start_llm(llm_invocation)
+        # Ensure we're in parent context before starting LLM span
+        # This prevents LLM span from being created as child of tool span
+        parent_invocation = get_parent_invocation()
+        parent_token = None
+        if parent_invocation and parent_invocation.span:
+            from opentelemetry.trace import set_span_in_context
+            ctx = set_span_in_context(parent_invocation.span)
+            parent_token = otel_context.attach(ctx)
+
+        try:
+            self.handler.start_llm(llm_invocation)
+        finally:
+            # Restore context after starting LLM span
+            if parent_token is not None:
+                try:
+                    otel_context.detach(parent_token)
+                except (ValueError, RuntimeError):
+                    # Token already detached or from different context, ignore
+                    pass
+                except Exception as e:
+                    # Other errors, log but don't raise
+                    logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
 
         # Override span start time
         if llm_invocation.span and start_time:
@@ -542,9 +610,11 @@ async def wrap_claude_client_receive_response(
         return
 
     prompt = getattr(instance, "_otel_prompt", "") or ""
-    model = "unknown"
-    if hasattr(instance, "options") and instance.options:
-        model = getattr(instance.options, "model", "unknown")
+    
+    # Use get_model_from_options_or_env to get model from options or environment variables
+    # This ensures consistency with wrap_query and follows Claude CLI behavior
+    options = getattr(instance, "options", None)
+    model = get_model_from_options_or_env(options)
 
     async for msg in _process_agent_invocation_stream(
         wrapped(*args, **kwargs),

From 4d94b4adcab8e8abf1f4ca1d3e72ab775e14b07c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Tue, 20 Jan 2026 10:00:01 +0800
Subject: [PATCH 15/23] [fix] fix ruff failed

Change-Id: I50bda0c04949a1e536106e9221d2755b23aba8ae
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../claude_agent_sdk/__init__.py              |   5 -
 .../instrumentation/claude_agent_sdk/hooks.py | 124 +++---------------
 .../instrumentation/claude_agent_sdk/patch.py |  83 +-----------
 3 files changed, 24 insertions(+), 188 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
index 591f911c6..7e34fa169 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/__init__.py
@@ -92,14 +92,12 @@ def _instrument(self, **kwargs: Any) -> None:
         meter_provider = kwargs.get("meter_provider")
         logger_provider = kwargs.get("logger_provider")
 
-        # Create ExtendedTelemetryHandler
         ClaudeAgentSDKInstrumentor._handler = ExtendedTelemetryHandler(
             tracer_provider=tracer_provider,
             meter_provider=meter_provider,
             logger_provider=logger_provider,
         )
 
-        # Wrap ClaudeSDKClient.__init__
         try:
             wrap_function_wrapper(
                 module="claude_agent_sdk",
@@ -120,7 +118,6 @@ def _instrument(self, **kwargs: Any) -> None:
                 f"Failed to instrument ClaudeSDKClient.__init__: {e}"
             )
 
-        # Wrap ClaudeSDKClient.query
         try:
             wrap_function_wrapper(
                 module="claude_agent_sdk",
@@ -139,7 +136,6 @@ def _instrument(self, **kwargs: Any) -> None:
         except Exception as e:
             logger.warning(f"Failed to instrument ClaudeSDKClient.query: {e}")
 
-        # Wrap ClaudeSDKClient.receive_response
         try:
             wrap_function_wrapper(
                 module="claude_agent_sdk",
@@ -160,7 +156,6 @@ def _instrument(self, **kwargs: Any) -> None:
                 f"Failed to instrument ClaudeSDKClient.receive_response: {e}"
             )
 
-        # Wrap standalone query() function
         try:
             wrap_function_wrapper(
                 module="claude_agent_sdk",
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
index 63a12a04e..a3d73ea1f 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
@@ -26,7 +26,6 @@
 from opentelemetry.util.genai.extended_types import ExecuteToolInvocation
 from opentelemetry.util.genai.types import Error
 
-
 if TYPE_CHECKING:
     from claude_agent_sdk import (
         HookContext,
@@ -44,10 +43,6 @@
 # Key: tool_use_id, Value: tool_invocation
 _client_managed_runs: Dict[str, ExecuteToolInvocation] = {}
 
-# Storage for Task tool invocations, used to parent subagent tool calls
-# Key: session_id, Value: Task tool invocation
-_task_tool_invocations: Dict[str, ExecuteToolInvocation] = {}
-
 
 async def pre_tool_use_hook(
     input_data: "HookInput",
@@ -81,14 +76,6 @@ async def pre_tool_use_hook(
     try:
         handler = get_extended_telemetry_handler()
         parent_invocation = get_parent_invocation()
-        
-        # For subagent tool calls: if there's an active Task tool for this session,
-        # use Task tool as parent instead of subagent's invoke_agent
-        if session_id and session_id in _task_tool_invocations:
-            task_tool_invocation = _task_tool_invocations[session_id]
-            if task_tool_invocation and task_tool_invocation.span:
-                # Use Task tool as parent for subagent tool calls
-                parent_invocation = task_tool_invocation
 
         if not parent_invocation:
             return {}
@@ -113,62 +100,19 @@ async def pre_tool_use_hook(
         if parent_invocation and parent_invocation.span:
             # Create child span in parent's context
             ctx = set_span_in_context(parent_invocation.span)
-            parent_token = otel_context.attach(ctx)
+            token = otel_context.attach(ctx)
 
             try:
-                # start_execute_tool will create tool span and attach tool context
                 handler.start_execute_tool(tool_invocation)
-                
-                # For Task tool: keep tool context active so subagent spans can be children
-                # For other tools: immediately detach tool context to restore parent context
-                # This ensures subsequent spans (LLM, other tools) are created
-                # as siblings of tool span, not children (except for Task tool)
-                if tool_name != "Task":
-                    # Immediately detach tool context for non-Task tools
-                    if tool_invocation.context_token is not None:
-                        try:
-                            otel_context.detach(tool_invocation.context_token)
-                            tool_invocation.context_token = None
-                        except (ValueError, RuntimeError):
-                            # Token already detached or from different context, ignore
-                            tool_invocation.context_token = None
-                        except Exception:
-                            # Other errors: set to None to prevent handler from trying to detach again
-                            # This ensures handler.stop_execute_tool won't fail
-                            tool_invocation.context_token = None
-                # For Task tool, keep context_token attached so subagent spans can be children
-                # Also save Task tool invocation for subagent tool calls to use as parent
-                if tool_name == "Task" and session_id:
-                    _task_tool_invocations[session_id] = tool_invocation
             finally:
-                # Detach parent context to restore original context
-                if parent_token is not None:
-                    try:
-                        otel_context.detach(parent_token)
-                    except (ValueError, RuntimeError):
-                        # Token already detached or from different context, ignore
-                        pass
-                    except Exception as e:
-                        # Other errors, log but don't raise
-                        logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
+                # Detach after starting span
+                try:
+                    otel_context.detach(token)
+                except Exception:
+                    pass  # Ignore detach errors
         else:
             # Fallback to auto-parenting (may not work due to broken context)
             handler.start_execute_tool(tool_invocation)
-            # For non-Task tools, detach tool context immediately to avoid polluting context
-            if tool_name != "Task":
-                if tool_invocation.context_token is not None:
-                    try:
-                        otel_context.detach(tool_invocation.context_token)
-                        tool_invocation.context_token = None
-                    except (ValueError, RuntimeError):
-                        # Token already detached or from different context, ignore
-                        tool_invocation.context_token = None
-                    except Exception:
-                        # Other errors, keep token for handler to handle
-                        pass
-            # Save Task tool invocation for subagent tool calls
-            if tool_name == "Task" and session_id:
-                _task_tool_invocations[session_id] = tool_invocation
 
         _active_tool_runs[tool_use_id] = (tool_invocation, handler)
 
@@ -248,50 +192,20 @@ async def post_tool_use_hook(
         # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
         tool_invocation.tool_call_result = tool_response
 
-        # Ensure we're in parent context before stopping tool span
-        # This prevents subsequent spans from being created as children of tool span
-        parent_invocation = get_parent_invocation()
-        parent_token = None
-        if parent_invocation and parent_invocation.span:
-            ctx = set_span_in_context(parent_invocation.span)
-            parent_token = otel_context.attach(ctx)
-
-        try:
-            is_error = False
-            if isinstance(tool_response, dict):
-                is_error_value = tool_response.get("is_error")
-                is_error = is_error_value is True
+        is_error = False
+        if isinstance(tool_response, dict):
+            is_error_value = tool_response.get("is_error")
+            is_error = is_error_value is True
 
-            # For non-Task tools: context_token was already set to None in pre_tool_use_hook
-            # For Task tools: context_token is still valid, handler will detach it
-            if is_error:
-                error_msg = (
-                    str(tool_response) if tool_response else "Tool execution error"
-                )
-                handler.fail_execute_tool(
-                    tool_invocation, Error(message=error_msg, type=RuntimeError)
-                )
-            else:
-                handler.stop_execute_tool(tool_invocation)
-        finally:
-            # Clean up Task tool invocation from storage
-            if tool_invocation.tool_name == "Task":
-                session_id = input_data.get("session_id", "")
-                if session_id and session_id in _task_tool_invocations:
-                    del _task_tool_invocations[session_id]
-            
-            # Restore parent context after stopping tool span
-            # This ensures subsequent spans (LLM, other tools) are created
-            # as siblings of tool span, not children
-            if parent_token is not None:
-                try:
-                    otel_context.detach(parent_token)
-                except (ValueError, RuntimeError):
-                    # Token already detached or from different context, ignore
-                    pass
-                except Exception as e:
-                    # Other errors, log but don't raise
-                    logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
+        if is_error:
+            error_msg = (
+                str(tool_response) if tool_response else "Tool execution error"
+            )
+            handler.fail_execute_tool(
+                tool_invocation, Error(message=error_msg, type=RuntimeError)
+            )
+        else:
+            handler.stop_execute_tool(tool_invocation)
 
     except Exception as e:
         logger.warning(
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index b9bbf73cd..7c9c90391 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -24,7 +24,6 @@
 from opentelemetry import context as otel_context
 from opentelemetry.instrumentation.claude_agent_sdk.context import (
     clear_parent_invocation,
-    get_parent_invocation,
     set_parent_invocation,
 )
 from opentelemetry.instrumentation.claude_agent_sdk.hooks import (
@@ -105,48 +104,7 @@ def _create_tool_spans_from_message(
                 tool_call_arguments=tool_input,
                 tool_description=tool_name,
             )
-            
-            # Ensure we're in parent context before starting tool span
-            parent_invocation = get_parent_invocation()
-            parent_token = None
-            if parent_invocation and parent_invocation.span:
-                from opentelemetry.trace import set_span_in_context
-                ctx = set_span_in_context(parent_invocation.span)
-                parent_token = otel_context.attach(ctx)
-            
-            try:
-                handler.start_execute_tool(tool_invocation)
-                
-                # For Task tool: keep tool context active so subagent spans can be children
-                # For other tools: immediately detach tool context to restore parent context
-                # This ensures subsequent spans (LLM, other tools) are created
-                # as siblings of tool span, not children (except for Task tool)
-                if tool_name != "Task":
-                    # Immediately detach tool context for non-Task tools
-                    if tool_invocation.context_token is not None:
-                        try:
-                            otel_context.detach(tool_invocation.context_token)
-                            tool_invocation.context_token = None
-                        except (ValueError, RuntimeError):
-                            # Token already detached or from different context, ignore
-                            tool_invocation.context_token = None
-                        except Exception:
-                            # Other errors: set to None to prevent handler from trying to detach again
-                            # This ensures handler.stop_execute_tool won't fail
-                            tool_invocation.context_token = None
-                # For Task tool, keep context_token attached so subagent spans can be children
-            finally:
-                # Restore context after starting tool span
-                if parent_token is not None:
-                    try:
-                        otel_context.detach(parent_token)
-                    except (ValueError, RuntimeError):
-                        # Token already detached or from different context, ignore
-                        pass
-                    except Exception as e:
-                        # Other errors, log but don't raise
-                        logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
-            
+            handler.start_execute_tool(tool_invocation)
             _client_managed_runs[tool_use_id] = tool_invocation
 
         except Exception as e:
@@ -171,10 +129,6 @@ def _close_tool_spans_from_message(
             if tool_use_id and tool_use_id in _client_managed_runs:
                 tool_invocation = _client_managed_runs.pop(tool_use_id)
 
-                # For Task tool: let stop_execute_tool/fail_execute_tool handle detach
-                # They will check if context_token is None and handle it properly
-                # We don't need to manually detach here, as the handler methods will do it
-
                 # Set tool response
                 tool_content = getattr(block, "content", None)
                 is_error_value = getattr(block, "is_error", None)
@@ -183,8 +137,6 @@ def _close_tool_spans_from_message(
                 tool_invocation.tool_call_result = tool_content
 
                 # Complete tool span
-                # For non-Task tools: context_token was already set to None in _create_tool_spans_from_message
-                # For Task tools: context_token is still valid, handler will detach it
                 if is_error:
                     error_msg = (
                         str(tool_content)
@@ -458,28 +410,7 @@ def start_llm_turn(
             input_messages=input_messages,
         )
 
-        # Ensure we're in parent context before starting LLM span
-        # This prevents LLM span from being created as child of tool span
-        parent_invocation = get_parent_invocation()
-        parent_token = None
-        if parent_invocation and parent_invocation.span:
-            from opentelemetry.trace import set_span_in_context
-            ctx = set_span_in_context(parent_invocation.span)
-            parent_token = otel_context.attach(ctx)
-
-        try:
-            self.handler.start_llm(llm_invocation)
-        finally:
-            # Restore context after starting LLM span
-            if parent_token is not None:
-                try:
-                    otel_context.detach(parent_token)
-                except (ValueError, RuntimeError):
-                    # Token already detached or from different context, ignore
-                    pass
-                except Exception as e:
-                    # Other errors, log but don't raise
-                    logger.debug(f"Failed to detach parent_token: {e}", exc_info=True)
+        self.handler.start_llm(llm_invocation)
 
         # Override span start time
         if llm_invocation.span and start_time:
@@ -594,8 +525,6 @@ def wrap_claude_client_query(wrapped, instance, args, kwargs, handler=None):
     return wrapped(*args, **kwargs)
 
 
-
-
 async def wrap_claude_client_receive_response(
     wrapped, instance, args, kwargs, handler=None
 ):
@@ -610,11 +539,9 @@ async def wrap_claude_client_receive_response(
         return
 
     prompt = getattr(instance, "_otel_prompt", "") or ""
-    
-    # Use get_model_from_options_or_env to get model from options or environment variables
-    # This ensures consistency with wrap_query and follows Claude CLI behavior
-    options = getattr(instance, "options", None)
-    model = get_model_from_options_or_env(options)
+    model = "unknown"
+    if hasattr(instance, "options") and instance.options:
+        model = getattr(instance.options, "model", "unknown")
 
     async for msg in _process_agent_invocation_stream(
         wrapped(*args, **kwargs),

From d505ce7333d823f5e274aa25ee4e6ee6ccfded00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Thu, 22 Jan 2026 10:04:01 +0800
Subject: [PATCH 16/23] add comment

Change-Id: I960449e43e8a3522bdafd9c0dbd6fcb7dff15345
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/hooks.py     | 13 ++++++++++++-
 .../instrumentation/claude_agent_sdk/patch.py     | 12 +++++++++---
 .../tests/test_edge_cases.py                      | 15 ++++++++++++---
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
index a3d73ea1f..2e2fc07b1 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
@@ -109,7 +109,9 @@ async def pre_tool_use_hook(
                 try:
                     otel_context.detach(token)
                 except Exception:
-                    pass  # Ignore detach errors
+                    # Ignore detach errors - context may already be invalid or detached.
+                    # Failing to detach here should not prevent tool execution from proceeding.
+                    pass
         else:
             # Fallback to auto-parenting (may not work due to broken context)
             handler.start_execute_tool(tool_invocation)
@@ -226,6 +228,9 @@ def clear_active_tool_runs() -> None:
     try:
         handler = get_extended_telemetry_handler()
     except Exception:
+        # If we can't get the handler (e.g., instrumentation not initialized),
+        # we still need to clear the tracking dictionaries to prevent memory leaks.
+        # Without a handler, we can't properly end spans, so just clean up state.
         _active_tool_runs.clear()
         _client_managed_runs.clear()
         return
@@ -241,6 +246,9 @@ def clear_active_tool_runs() -> None:
                 ),
             )
         except Exception:
+            # Ignore errors when failing orphaned tools during cleanup.
+            # If the span is already ended or invalid, we don't want to crash.
+            # Best effort cleanup: continue processing remaining tools.
             pass
 
     # End any orphaned tool runs
@@ -254,6 +262,9 @@ def clear_active_tool_runs() -> None:
                 ),
             )
         except Exception:
+            # Ignore errors when failing orphaned tools during cleanup.
+            # If the span is already ended or invalid, we don't want to crash.
+            # Best effort cleanup: continue processing remaining tools.
             pass
 
     _active_tool_runs.clear()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 7c9c90391..3f007df42 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -411,13 +411,19 @@ def start_llm_turn(
         )
 
         self.handler.start_llm(llm_invocation)
-
-        # Override span start time
+        # Override span start time.
+        # TODO(telemetry): Avoid relying on the private `_start_time` attribute.
+        # The long-term fix is to plumb a public `start_time` parameter through
+        # ExtendedTelemetryHandler.start_llm and the underlying span creation,
+        # so the desired start time can be set via a supported API instead of
+        # mutating internal span state here. Until that is available, we perform
+        # a best-effort adjustment guarded by hasattr and try/except so that
+        # failures do not break tracing.
         if llm_invocation.span and start_time:
             start_time_ns = int(start_time * 1_000_000_000)
             try:
                 if hasattr(llm_invocation.span, "_start_time"):
-                    llm_invocation.span._start_time = start_time_ns  # type: ignore
+                    setattr(llm_invocation.span, "_start_time", start_time_ns)
             except Exception as e:
                 logger.warning(f"Failed to set span start time: {e}")
 
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
index 54befef26..8f5604273 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
@@ -19,7 +19,10 @@ async def test_query_with_api_error(instrument, span_exporter):
         async for _ in query(prompt="", options=options):
             pass
     except Exception:
-        pass  # Expected to fail
+        # Expected to fail with empty prompt. This test verifies that instrumentation
+        # creates spans even when the SDK raises exceptions, ensuring telemetry
+        # doesn't break on edge cases.
+        pass
 
     # Get spans
     spans = span_exporter.get_finished_spans()
@@ -47,6 +50,8 @@ async def test_query_with_empty_prompt(instrument, span_exporter):
             if count > 5:  # Prevent infinite loop
                 break
     except Exception:
+        # Ignore exceptions here; this test only verifies that instrumentation
+        # can handle an empty prompt without crashing the test suite.
         pass
 
 
@@ -329,7 +334,9 @@ async def test_query_with_very_long_prompt(instrument, span_exporter):
             if count > 5:
                 break
     except Exception:
-        pass  # May fail due to token limits
+        # May fail due to token limits or rate limiting. This test verifies
+        # that instrumentation creates spans regardless of API errors.
+        pass
 
     # Should still create spans
     spans = span_exporter.get_finished_spans()
@@ -350,4 +357,6 @@ def test_patch_with_missing_module():
         instrumentor.instrument(tracer_provider=TracerProvider())
         instrumentor.uninstrument()
     except Exception:
-        pass  # Expected if SDK is not installed
+        # Expected if SDK is not installed or import fails. This test verifies
+        # graceful handling when the instrumented library is missing.
+        pass

From 0c74a8569531976012980e28e4b0a40760d87d46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 26 Jan 2026 12:44:03 +0800
Subject: [PATCH 17/23] [fix] fix span attribute

Change-Id: I4bab45d35f385a2fe25af0d8e2456db187ec46d6
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../claude_agent_sdk/context.py               |   53 -
 .../instrumentation/claude_agent_sdk/hooks.py |  271 ---
 .../instrumentation/claude_agent_sdk/patch.py |  424 ++--
 .../instrumentation/claude_agent_sdk/utils.py |   43 -
 .../tests/cassettes/test_echo_command.yaml    |  156 ++
 .../tests/cassettes/test_foo_sh_command.yaml  |  252 +++
 .../tests/cassettes/test_pretooluse_hook.yaml |  252 +++
 .../tests/conftest_cassettes.py               |   39 +
 .../tests/extract_cassettes.py                |   72 +
 .../tests/test_attributes.py                  |    1 -
 .../tests/test_edge_cases.py                  |  158 --
 .../tests/test_integration.py                 |   54 -
 .../tests/test_message_flow_cases.py          | 1939 +++++++++++++++++
 .../tests/test_mocks.py                       |   58 -
 .../tests/test_span_validation.py             |  414 ++++
 .../tests/test_task_tool.py                   |  247 +++
 .../tests/test_unit.py                        |   47 -
 .../tests/test_with_cassettes.py              |  313 +++
 18 files changed, 3937 insertions(+), 856 deletions(-)
 delete mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py
 delete mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py
deleted file mode 100644
index 4b7d160fe..000000000
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/context.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright The OpenTelemetry Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Thread-local storage utilities for Claude Agent SDK tracing.
-
-This module provides thread-local storage for the parent invocation context,
-which is used by hooks to maintain trace context when async context
-propagation is broken (Claude's async event loop breaks OpenTelemetry context).
-"""
-
-import threading
-from typing import Any, Optional
-
-# Thread-local store for passing the parent invocation into hooks.
-# Claude's async event loop by default breaks OpenTelemetry context propagation.
-# The parent invocation is threaded via thread-local as a fallback.
-_thread_local = threading.local()
-
-
-def set_parent_invocation(invocation: Any) -> None:
-    """Set the parent agent invocation in thread-local storage.
-
-    Args:
-        invocation: InvokeAgentInvocation or ExecuteToolInvocation instance
-    """
-    _thread_local.parent_invocation = invocation
-
-
-def clear_parent_invocation() -> None:
-    """Clear the parent invocation from thread-local storage."""
-    if hasattr(_thread_local, "parent_invocation"):
-        delattr(_thread_local, "parent_invocation")
-
-
-def get_parent_invocation() -> Optional[Any]:
-    """Get the parent invocation from thread-local storage.
-
-    Returns:
-        Parent invocation or None if not set
-    """
-    return getattr(_thread_local, "parent_invocation", None)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
deleted file mode 100644
index 2e2fc07b1..000000000
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/hooks.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# Copyright The OpenTelemetry Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
-
-from opentelemetry import context as otel_context
-from opentelemetry.instrumentation.claude_agent_sdk.context import (
-    get_parent_invocation,
-)
-from opentelemetry.trace import set_span_in_context
-from opentelemetry.util.genai.extended_handler import (
-    get_extended_telemetry_handler,
-)
-from opentelemetry.util.genai.extended_types import ExecuteToolInvocation
-from opentelemetry.util.genai.types import Error
-
-if TYPE_CHECKING:
-    from claude_agent_sdk import (
-        HookContext,
-        HookInput,
-        HookJSONOutput,
-    )
-
-logger = logging.getLogger(__name__)
-
-# Storage for correlating PreToolUse and PostToolUse events
-# Key: tool_use_id, Value: (tool_invocation, handler)
-_active_tool_runs: Dict[str, Tuple[ExecuteToolInvocation, Any]] = {}
-
-# Storage for tool or subagent runs managed by client
-# Key: tool_use_id, Value: tool_invocation
-_client_managed_runs: Dict[str, ExecuteToolInvocation] = {}
-
-
-async def pre_tool_use_hook(
-    input_data: "HookInput",
-    tool_use_id: Optional[str],
-    context: "HookContext",
-) -> "HookJSONOutput":
-    """Trace tool execution before it starts.
-
-    This hook is called by Claude Agent SDK before executing a tool.
-    It creates an execute_tool span as a child of the current agent span.
-
-    Args:
-        input_data: Contains `tool_name`, `tool_input`, `session_id`
-        tool_use_id: Unique identifier for this tool invocation
-        context: Hook context (currently contains only signal)
-
-    Returns:
-        Hook output (empty dict allows execution to proceed)
-    """
-    if not tool_use_id:
-        return {}
-
-    # Skip if this tool run is already managed by the client
-    if tool_use_id in _client_managed_runs:
-        return {}
-
-    tool_name: str = str(input_data.get("tool_name", "unknown_tool"))
-    tool_input = input_data.get("tool_input", {})
-    session_id = input_data.get("session_id", "")
-
-    try:
-        handler = get_extended_telemetry_handler()
-        parent_invocation = get_parent_invocation()
-
-        if not parent_invocation:
-            return {}
-
-        # Create tool invocation following ExecuteToolInvocation semantic conventions
-        # Map to standard fields strictly, avoiding custom attributes
-        tool_invocation = ExecuteToolInvocation(
-            tool_name=tool_name,
-            tool_call_id=tool_use_id,
-            tool_call_arguments=tool_input,  # Standard field: tool call arguments
-            tool_description=tool_name,  # Use tool_name directly
-            attributes={
-                # Only include Claude Agent SDK-specific attributes that cannot map to standard fields
-                "tool.session_id": session_id,
-            }
-            if session_id
-            else {},
-        )
-
-        # Explicitly create tool span as child of parent invocation span
-        # This avoids relying on broken async context propagation
-        if parent_invocation and parent_invocation.span:
-            # Create child span in parent's context
-            ctx = set_span_in_context(parent_invocation.span)
-            token = otel_context.attach(ctx)
-
-            try:
-                handler.start_execute_tool(tool_invocation)
-            finally:
-                # Detach after starting span
-                try:
-                    otel_context.detach(token)
-                except Exception:
-                    # Ignore detach errors - context may already be invalid or detached.
-                    # Failing to detach here should not prevent tool execution from proceeding.
-                    pass
-        else:
-            # Fallback to auto-parenting (may not work due to broken context)
-            handler.start_execute_tool(tool_invocation)
-
-        _active_tool_runs[tool_use_id] = (tool_invocation, handler)
-
-    except Exception as e:
-        logger.warning(
-            f"Error in PreToolUse hook for {tool_name}: {e}", exc_info=True
-        )
-
-    return {}
-
-
-async def post_tool_use_hook(
-    input_data: "HookInput",
-    tool_use_id: Optional[str],
-    context: "HookContext",
-) -> "HookJSONOutput":
-    """Trace tool execution after it completes.
-
-    This hook is called by Claude Agent SDK after tool execution completes.
-    It ends the corresponding execute_tool span and records the result.
-
-    Args:
-        input_data: Contains `tool_name`, `tool_input`, `tool_response`, `session_id`, etc.
-        tool_use_id: Unique identifier for this tool invocation
-        context: Hook context (currently contains only signal)
-
-    Returns:
-        Hook output (empty dict by default)
-    """
-    if not tool_use_id:
-        return {}
-
-    tool_name: str = str(input_data.get("tool_name", "unknown_tool"))
-    tool_response = input_data.get("tool_response")
-
-    # Check if this is a client-managed run
-    client_invocation = _client_managed_runs.pop(tool_use_id, None)
-    if client_invocation:
-        # This run is managed by the client (subagent session or its tools)
-        try:
-            handler = get_extended_telemetry_handler()
-
-            # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
-            client_invocation.tool_call_result = tool_response
-
-            is_error = False
-            if isinstance(tool_response, dict):
-                is_error_value = tool_response.get("is_error")
-                is_error = is_error_value is True
-
-            if is_error:
-                error_msg = (
-                    str(tool_response)
-                    if tool_response
-                    else "Tool execution error"
-                )
-                handler.fail_execute_tool(
-                    client_invocation,
-                    Error(message=error_msg, type=RuntimeError),
-                )
-            else:
-                handler.stop_execute_tool(client_invocation)
-
-        except Exception as e:
-            logger.warning(
-                f"Failed to complete client-managed run: {e}", exc_info=True
-            )
-        return {}
-
-    try:
-        run_info = _active_tool_runs.pop(tool_use_id, None)
-        if not run_info:
-            return {}
-
-        tool_invocation, handler = run_info
-
-        # Set response (will be auto-formatted to gen_ai.tool.call.result by telemetry handler)
-        tool_invocation.tool_call_result = tool_response
-
-        is_error = False
-        if isinstance(tool_response, dict):
-            is_error_value = tool_response.get("is_error")
-            is_error = is_error_value is True
-
-        if is_error:
-            error_msg = (
-                str(tool_response) if tool_response else "Tool execution error"
-            )
-            handler.fail_execute_tool(
-                tool_invocation, Error(message=error_msg, type=RuntimeError)
-            )
-        else:
-            handler.stop_execute_tool(tool_invocation)
-
-    except Exception as e:
-        logger.warning(
-            f"Error in PostToolUse hook for {tool_name}: {e}", exc_info=True
-        )
-
-    return {}
-
-
-def clear_active_tool_runs() -> None:
-    """Clear all active tool runs.
-
-    This should be called when a conversation ends to avoid memory leaks
-    and to clean up any orphaned tool runs.
-    """
-    global _active_tool_runs, _client_managed_runs
-
-    try:
-        handler = get_extended_telemetry_handler()
-    except Exception:
-        # If we can't get the handler (e.g., instrumentation not initialized),
-        # we still need to clear the tracking dictionaries to prevent memory leaks.
-        # Without a handler, we can't properly end spans, so just clean up state.
-        _active_tool_runs.clear()
-        _client_managed_runs.clear()
-        return
-
-    # End any orphaned client-managed runs
-    for tool_use_id, tool_invocation in list(_client_managed_runs.items()):
-        try:
-            handler.fail_execute_tool(
-                tool_invocation,
-                Error(
-                    message="Client-managed run not completed (conversation ended)",
-                    type=RuntimeError,
-                ),
-            )
-        except Exception:
-            # Ignore errors when failing orphaned tools during cleanup.
-            # If the span is already ended or invalid, we don't want to crash.
-            # Best effort cleanup: continue processing remaining tools.
-            pass
-
-    # End any orphaned tool runs
-    for tool_use_id, (tool_invocation, _) in list(_active_tool_runs.items()):
-        try:
-            handler.fail_execute_tool(
-                tool_invocation,
-                Error(
-                    message="Tool run not completed (conversation ended)",
-                    type=RuntimeError,
-                ),
-            )
-        except Exception:
-            # Ignore errors when failing orphaned tools during cleanup.
-            # If the span is already ended or invalid, we don't want to crash.
-            # Best effort cleanup: continue processing remaining tools.
-            pass
-
-    _active_tool_runs.clear()
-    _client_managed_runs.clear()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 3f007df42..0f4e94dc0 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -18,26 +18,17 @@
 import time
 from typing import Any, Dict, List, Optional
 
-from claude_agent_sdk import HookMatcher
-from claude_agent_sdk.types import ClaudeAgentOptions
-
 from opentelemetry import context as otel_context
-from opentelemetry.instrumentation.claude_agent_sdk.context import (
-    clear_parent_invocation,
-    set_parent_invocation,
-)
-from opentelemetry.instrumentation.claude_agent_sdk.hooks import (
-    _client_managed_runs,
-    clear_active_tool_runs,
-    post_tool_use_hook,
-    pre_tool_use_hook,
-)
+from opentelemetry.trace import set_span_in_context
 from opentelemetry.instrumentation.claude_agent_sdk.utils import (
     extract_usage_from_result_message,
     get_model_from_options_or_env,
     infer_provider_from_base_url,
 )
-from opentelemetry.util.genai.extended_handler import ExtendedTelemetryHandler
+from opentelemetry.util.genai.extended_handler import (
+    ExtendedTelemetryHandler,
+    get_extended_telemetry_handler,
+)
 from opentelemetry.util.genai.extended_types import (
     ExecuteToolInvocation,
     InvokeAgentInvocation,
@@ -46,13 +37,54 @@
     Error,
     InputMessage,
     LLMInvocation,
+    MessagePart,
     OutputMessage,
     Text,
     ToolCall,
+    ToolCallResponse,
 )
 
 logger = logging.getLogger(__name__)
 
+# Storage for tool runs managed by client (created from response stream)
+# Key: tool_use_id, Value: tool_invocation
+_client_managed_runs: Dict[str, ExecuteToolInvocation] = {}
+
+
+def _clear_client_managed_runs() -> None:
+    """Clear all client-managed tool runs.
+    
+    This should be called when a conversation ends to avoid memory leaks
+    and to clean up any orphaned tool runs.
+    """
+    global _client_managed_runs
+
+    try:
+        handler = get_extended_telemetry_handler()
+    except Exception:
+        # If we can't get the handler (e.g., instrumentation not initialized),
+        # we still need to clear the tracking dictionary to prevent memory leaks.
+        _client_managed_runs.clear()
+        return
+
+    # End any orphaned tool runs
+    for tool_use_id, tool_invocation in list(_client_managed_runs.items()):
+        try:
+            handler.fail_execute_tool(
+                tool_invocation,
+                Error(
+                    message="Tool run not completed (conversation ended)",
+                    type=RuntimeError,
+                ),
+            )
+        except Exception:
+            # Ignore errors when failing orphaned tools during cleanup.
+            # If the span is already ended or invalid, we don't want to crash.
+            # Best effort cleanup: continue processing remaining tools.
+            pass
+
+    _client_managed_runs.clear()
+
 
 def _extract_message_parts(msg: Any) -> List[Any]:
     """Extract parts (text + tool calls) from an AssistantMessage."""
@@ -78,82 +110,67 @@ def _extract_message_parts(msg: Any) -> List[Any]:
 def _create_tool_spans_from_message(
     msg: Any,
     handler: ExtendedTelemetryHandler,
+    agent_invocation: InvokeAgentInvocation,
+    active_task_stack: List[Any],
     exclude_tool_names: Optional[List[str]] = None,
 ) -> None:
-    """Create tool execution spans from ToolUseBlocks in an AssistantMessage."""
+    """Create tool execution spans from ToolUseBlocks in an AssistantMessage.
+    
+    Tool spans are children of the active Task span (if any), otherwise agent span.
+    When a Task tool is created, it's pushed onto active_task_stack.
+    """
     if not hasattr(msg, "content"):
         return
 
     exclude_tool_names = exclude_tool_names or []
 
-    for block in msg.content:
-        if type(block).__name__ != "ToolUseBlock":
-            continue
-
-        tool_use_id = getattr(block, "id", None)
-        tool_name = getattr(block, "name", "unknown_tool")
-        tool_input = getattr(block, "input", {})
-
-        if not tool_use_id or tool_name in exclude_tool_names:
-            continue
-
+    # Determine parent span: use active Task span if exists, otherwise agent span
+    parent_span = active_task_stack[-1].span if active_task_stack else agent_invocation.span
+    
+    parent_context_token = None
+    if parent_span:
         try:
-            tool_invocation = ExecuteToolInvocation(
-                tool_name=tool_name,
-                tool_call_id=tool_use_id,
-                tool_call_arguments=tool_input,
-                tool_description=tool_name,
+            parent_context_token = otel_context.attach(
+                set_span_in_context(parent_span)
             )
-            handler.start_execute_tool(tool_invocation)
-            _client_managed_runs[tool_use_id] = tool_invocation
-
-        except Exception as e:
-            logger.warning(f"Failed to create tool span for {tool_name}: {e}")
-
-
-def _close_tool_spans_from_message(
-    msg: Any,
-    handler: ExtendedTelemetryHandler,
-) -> List[str]:
-    """Close tool execution spans from ToolResultBlocks in a UserMessage."""
-    user_text_parts = []
+        except Exception:
+            pass
 
-    if not hasattr(msg, "content"):
-        return user_text_parts
-
-    for block in msg.content:
-        block_type = type(block).__name__
-
-        if block_type == "ToolResultBlock":
-            tool_use_id = getattr(block, "tool_use_id", None)
-            if tool_use_id and tool_use_id in _client_managed_runs:
-                tool_invocation = _client_managed_runs.pop(tool_use_id)
-
-                # Set tool response
-                tool_content = getattr(block, "content", None)
-                is_error_value = getattr(block, "is_error", None)
-                is_error = is_error_value is True
+    try:
+        for block in msg.content:
+            if type(block).__name__ != "ToolUseBlock":
+                continue
+            
+            tool_use_id = getattr(block, "id", None)
+            tool_name = getattr(block, "name", "unknown_tool")
+            tool_input = getattr(block, "input", {})
 
-                tool_invocation.tool_call_result = tool_content
+            if not tool_use_id or tool_name in exclude_tool_names:
+                continue
 
-                # Complete tool span
-                if is_error:
-                    error_msg = (
-                        str(tool_content)
-                        if tool_content
-                        else "Tool execution error"
-                    )
-                    handler.fail_execute_tool(
-                        tool_invocation,
-                        Error(message=error_msg, type=RuntimeError),
-                    )
-                else:
-                    handler.stop_execute_tool(tool_invocation)
-
-        elif block_type == "TextBlock":
-            user_text_parts.append(getattr(block, "text", ""))
+            try:
+                tool_invocation = ExecuteToolInvocation(
+                    tool_name=tool_name,
+                    tool_call_id=tool_use_id,
+                    tool_call_arguments=tool_input,
+                    tool_description=tool_name,
+                )
+                handler.start_execute_tool(tool_invocation)
+                _client_managed_runs[tool_use_id] = tool_invocation
+                
+                # If this is a Task tool, push it onto the stack
+                if tool_name == "Task":
+                    active_task_stack.append(tool_invocation)
+                    logger.debug(f"Task span created and pushed: {tool_use_id}, stack depth: {len(active_task_stack)}")
 
-    return user_text_parts
+            except Exception as e:
+                logger.warning(f"Failed to create tool span for {tool_name}: {e}")
+    finally:
+        if parent_context_token is not None:
+            try:
+                otel_context.detach(parent_context_token)
+            except Exception:
+                pass
 
 
 def _update_token_usage(
@@ -186,13 +203,20 @@ def _process_assistant_message(
     turn_tracker: "AssistantTurnTracker",
     handler: ExtendedTelemetryHandler,
     collected_messages: List[Dict[str, Any]],
+    active_task_stack: List[Any],
 ) -> None:
     """Process AssistantMessage: create LLM turn, extract parts, create tool spans."""
     parts = _extract_message_parts(msg)
     has_text_content = any(isinstance(p, Text) for p in parts)
+    has_tool_calls = any(isinstance(p, ToolCall) for p in parts)
+    
+    # Check if we're inside a Task
+    is_inside_task = len(active_task_stack) > 0
 
     if has_text_content:
-        # This is the start of a new LLM response (with text content)
+        if turn_tracker.current_llm_invocation:
+            turn_tracker.close_llm_turn()
+        
         message_arrival_time = time.time()
 
         turn_tracker.start_llm_turn(
@@ -207,29 +231,47 @@ def _process_assistant_message(
         if parts:
             turn_tracker.add_assistant_output(parts)
             output_msg = OutputMessage(
-                role="assistant", parts=parts, finish_reason="stop"
+                role="assistant", parts=list(parts), finish_reason="stop"
             )
             agent_invocation.output_messages.append(output_msg)
-
-            text_parts = [p.content for p in parts if isinstance(p, Text)]
-            if text_parts:
+            
+            # Only add to collected_messages if not inside a Task
+            if not is_inside_task:
                 collected_messages.append(
-                    {"role": "assistant", "content": " ".join(text_parts)}
+                    {"role": "assistant", "parts": list(parts)}
                 )
 
-    else:
-        # This is a tool-only message, part of the current LLM turn
-        # Append it to the current LLM invocation's output
+    elif has_tool_calls:
         if parts and turn_tracker.current_llm_invocation:
-            turn_tracker.add_assistant_output(parts)
-            output_msg = OutputMessage(
-                role="assistant", parts=parts, finish_reason="stop"
-            )
-            agent_invocation.output_messages.append(output_msg)
+            if turn_tracker.current_llm_invocation.output_messages:
+                last_output_msg = turn_tracker.current_llm_invocation.output_messages[-1]
+                last_output_msg.parts.extend(parts)
+            else:
+                turn_tracker.add_assistant_output(parts)
+
+        # Only add to collected_messages if not inside a Task
+        if not is_inside_task:
+            if parts and collected_messages:
+                last_msg = collected_messages[-1]
+                if (last_msg.get("role") == "assistant" and 
+                    turn_tracker.current_llm_invocation):
+                    last_parts = last_msg.get("parts", [])
+                    last_parts.extend(parts)
+                    last_msg["parts"] = last_parts
+                else:
+                    collected_messages.append(
+                        {"role": "assistant", "parts": list(parts)}
+                    )
+            elif parts:
+                collected_messages.append(
+                    {"role": "assistant", "parts": list(parts)}
+                )
 
+    # Close LLM turn before creating tool spans to ensure correct timeline
+    if has_tool_calls and turn_tracker.current_llm_invocation:
         turn_tracker.close_llm_turn()
 
-    _create_tool_spans_from_message(msg, handler)
+    _create_tool_spans_from_message(msg, handler, agent_invocation, active_task_stack)
 
 
 def _process_user_message(
@@ -237,14 +279,85 @@ def _process_user_message(
     turn_tracker: "AssistantTurnTracker",
     handler: ExtendedTelemetryHandler,
     collected_messages: List[Dict[str, Any]],
+    active_task_stack: List[Any],
 ) -> None:
     """Process UserMessage: close tool spans, collect message content, mark next LLM start."""
-    user_text_parts = _close_tool_spans_from_message(msg, handler)
-
-    if user_text_parts:
-        user_content = " ".join(user_text_parts)
-        collected_messages.append({"role": "user", "content": user_content})
-
+    user_parts: List[MessagePart] = []
+    tool_parts: List[MessagePart] = []
+    
+    # Check if we're inside a Task
+    is_inside_task = len(active_task_stack) > 0
+    
+    if hasattr(msg, "content"):
+        for block in msg.content:
+            block_type = type(block).__name__
+            
+            if block_type == "ToolResultBlock":
+                tool_use_id = getattr(block, "tool_use_id", None)
+                if tool_use_id and tool_use_id in _client_managed_runs:
+                    tool_invocation = _client_managed_runs.pop(tool_use_id)
+
+                    # Set tool response
+                    tool_content = getattr(block, "content", None)
+                    is_error_value = getattr(block, "is_error", None)
+                    is_error = is_error_value is True
+
+                    tool_invocation.tool_call_result = tool_content
+
+                    if is_error:
+                        error_msg = (
+                            str(tool_content)
+                            if tool_content
+                            else "Tool execution error"
+                        )
+                        handler.fail_execute_tool(
+                            tool_invocation,
+                            Error(message=error_msg, type=RuntimeError),
+                        )
+                    else:
+                        handler.stop_execute_tool(tool_invocation)
+                    
+                    # Check if this is a Task tool result - if so, pop from stack
+                    # BEFORE we check is_inside_task for message filtering
+                    is_task_result = active_task_stack and active_task_stack[-1].tool_call_id == tool_use_id
+                    if is_task_result:
+                        active_task_stack.pop()
+                        logger.debug(f"Task span closed and popped: {tool_use_id}, stack depth: {len(active_task_stack)}")
+                
+                if tool_use_id:
+                    tool_parts.append(
+                        ToolCallResponse(
+                            id=tool_use_id,
+                            response=tool_content if tool_content else "",
+                        )
+                    )
+            
+            elif block_type == "TextBlock":
+                text_content = getattr(block, "text", "")
+                if text_content:
+                    user_parts.append(Text(content=text_content))
+
+    # Re-check if we're inside a Task AFTER popping Task results
+    # This ensures Task tool results are NOT filtered out
+    is_inside_task = len(active_task_stack) > 0
+    
+    # Only add to collected_messages if not inside a Task
+    if not is_inside_task:
+        if user_parts:
+            collected_messages.append({"role": "user", "parts": user_parts})
+        
+        if tool_parts:
+            if collected_messages:
+                last_msg = collected_messages[-1]
+                if (last_msg.get("role") == "tool" and 
+                    turn_tracker.current_llm_invocation):
+                    last_parts = last_msg.get("parts", [])
+                    last_parts.extend(tool_parts)
+                    last_msg["parts"] = last_parts
+                else:
+                    collected_messages.append({"role": "tool", "parts": tool_parts})
+            else:
+                collected_messages.append({"role": "tool", "parts": tool_parts})    
     # Always mark next LLM start when UserMessage arrives
     turn_tracker.mark_next_llm_start()
 
@@ -254,15 +367,14 @@ def _process_result_message(
     agent_invocation: InvokeAgentInvocation,
     turn_tracker: "AssistantTurnTracker",
 ) -> None:
-    """Process ResultMessage: update session_id and token usage."""
+    """Process ResultMessage: update session_id, token usage, and close any open LLM turn."""
     if hasattr(msg, "session_id") and msg.session_id:
         agent_invocation.conversation_id = msg.session_id
-        if agent_invocation.span:
-            agent_invocation.span.set_attribute(
-                "gen_ai.conversation.id", msg.session_id
-            )
 
     _update_token_usage(agent_invocation, turn_tracker, msg)
+    
+    if turn_tracker.current_llm_invocation:
+        turn_tracker.close_llm_turn()
 
 
 async def _process_agent_invocation_stream(
@@ -288,10 +400,11 @@ async def _process_agent_invocation_stream(
         else [],
     )
 
-    # Clear context to create a new root trace for each independent query
-    otel_context.attach(otel_context.Context())
+    # Attach empty context to clear any previous context, ensuring each query
+    # creates an independent root trace. This is important for scenarios where
+    # multiple queries are called in the same script - each should have its own trace_id.
+    empty_context_token = otel_context.attach(otel_context.Context())
     handler.start_invoke_agent(agent_invocation)
-    set_parent_invocation(agent_invocation)
 
     query_start_time = time.time()
     turn_tracker = AssistantTurnTracker(
@@ -299,6 +412,11 @@ async def _process_agent_invocation_stream(
     )
 
     collected_messages: List[Dict[str, Any]] = []
+    
+    # Stack to track active Task tool invocations
+    # When a Task tool is created, it's pushed here
+    # When its ToolResultBlock is received, it's popped
+    active_task_stack: List[Any] = []
 
     try:
         async for msg in wrapped_stream:
@@ -313,6 +431,7 @@ async def _process_agent_invocation_stream(
                     turn_tracker,
                     handler,
                     collected_messages,
+                    active_task_stack,
                 )
             elif msg_type == "UserMessage":
                 _process_user_message(
@@ -320,17 +439,16 @@ async def _process_agent_invocation_stream(
                     turn_tracker,
                     handler,
                     collected_messages,
+                    active_task_stack,
                 )
             elif msg_type == "ResultMessage":
                 _process_result_message(msg, agent_invocation, turn_tracker)
 
             yield msg
 
-        # Handle successful completion
         handler.stop_invoke_agent(agent_invocation)
 
     except Exception as e:
-        # Handle error
         error_msg = str(e)
         if agent_invocation.span:
             agent_invocation.span.set_attribute("error.type", type(e).__name__)
@@ -341,10 +459,18 @@ async def _process_agent_invocation_stream(
 
         raise
     finally:
-        # Cleanup
         turn_tracker.close()
-        clear_active_tool_runs()
-        clear_parent_invocation()
+        
+        # Clean up any remaining Task spans in stack (shouldn't happen in normal flow)
+        while active_task_stack:
+            task_invocation = active_task_stack.pop()
+            logger.warning(f"Unclosed Task span at end of invocation: {task_invocation.tool_call_id}")
+        
+        # Detach empty context token to restore the original context.
+        # Note: stop_invoke_agent/fail_invoke_agent already detached invocation.context_token,
+        # which restored to empty context. Now we detach empty_context_token to restore further.
+        otel_context.detach(empty_context_token)
+        _clear_client_managed_runs()
 
 
 class AssistantTurnTracker:
@@ -388,7 +514,6 @@ def start_llm_turn(
 
         self.next_llm_start_time = None
 
-        # Build input_messages from prompt + collected messages
         input_messages = []
 
         if prompt:
@@ -398,11 +523,19 @@ def start_llm_turn(
 
         for hist_msg in collected_messages:
             role = hist_msg.get("role", "user")
-            content = hist_msg.get("content", "")
-            if isinstance(content, str) and content:
-                input_messages.append(
-                    InputMessage(role=role, parts=[Text(content=content)])
-                )
+            
+            if "parts" in hist_msg:
+                parts = hist_msg["parts"]
+                if parts:
+                    input_messages.append(
+                        InputMessage(role=role, parts=parts)
+                    )
+            elif "content" in hist_msg:
+                content = hist_msg["content"]
+                if isinstance(content, str) and content:
+                    input_messages.append(
+                        InputMessage(role=role, parts=[Text(content=content)])
+                    )
 
         llm_invocation = LLMInvocation(
             provider=provider,
@@ -411,14 +544,7 @@ def start_llm_turn(
         )
 
         self.handler.start_llm(llm_invocation)
-        # Override span start time.
-        # TODO(telemetry): Avoid relying on the private `_start_time` attribute.
-        # The long-term fix is to plumb a public `start_time` parameter through
-        # ExtendedTelemetryHandler.start_llm and the underlying span creation,
-        # so the desired start time can be set via a supported API instead of
-        # mutating internal span state here. Until that is available, we perform
-        # a best-effort adjustment guarded by hasattr and try/except so that
-        # failures do not break tracing.
+        # TODO(telemetry): Use public API for setting span start time
         if llm_invocation.span and start_time:
             start_time_ns = int(start_time * 1_000_000_000)
             try:
@@ -436,14 +562,10 @@ def add_assistant_output(self, parts: List[Any]) -> None:
             return
 
         output_msg = OutputMessage(
-            role="assistant", parts=parts, finish_reason="stop"
+            role="assistant", parts=list(parts), finish_reason="stop"
         )
         self.current_llm_invocation.output_messages.append(output_msg)
 
-    def add_user_message(self, content: str) -> None:
-        """Mark next LLM start time."""
-        self.mark_next_llm_start()
-
     def mark_next_llm_start(self) -> None:
         """Mark the start time for the next LLM invocation."""
         self.next_llm_start_time = time.time()
@@ -477,42 +599,12 @@ def close(self) -> None:
             self.current_llm_invocation = None
 
 
-def _inject_tracing_hooks(options: Any) -> None:
-    """Inject OpenTelemetry tracing hooks into ClaudeAgentOptions."""
-    if not hasattr(options, "hooks"):
-        return
-
-    if options.hooks is None:
-        options.hooks = {}
-
-    if "PreToolUse" not in options.hooks:
-        options.hooks["PreToolUse"] = []
-
-    if "PostToolUse" not in options.hooks:
-        options.hooks["PostToolUse"] = []
-
-    try:
-        otel_pre_matcher = HookMatcher(matcher=None, hooks=[pre_tool_use_hook])
-        otel_post_matcher = HookMatcher(
-            matcher=None, hooks=[post_tool_use_hook]
-        )
-
-        options.hooks["PreToolUse"].insert(0, otel_pre_matcher)
-        options.hooks["PostToolUse"].insert(0, otel_post_matcher)
-    except Exception as e:
-        logger.warning(f"Failed to inject tracing hooks: {e}")
-
-
 def wrap_claude_client_init(wrapped, instance, args, kwargs, handler=None):
     """Wrapper for ClaudeSDKClient.__init__ to inject tracing hooks."""
     if handler is None:
         logger.warning("Handler not provided, skipping instrumentation")
         return wrapped(*args, **kwargs)
 
-    options = kwargs.get("options") or (args[0] if args else None)
-    if options:
-        _inject_tracing_hooks(options)
-
     result = wrapped(*args, **kwargs)
 
     instance._otel_handler = handler
@@ -547,7 +639,7 @@ async def wrap_claude_client_receive_response(
     prompt = getattr(instance, "_otel_prompt", "") or ""
     model = "unknown"
     if hasattr(instance, "options") and instance.options:
-        model = getattr(instance.options, "model", "unknown")
+        model = get_model_from_options_or_env(instance.options)
 
     async for msg in _process_agent_invocation_stream(
         wrapped(*args, **kwargs),
@@ -569,16 +661,6 @@ async def wrap_query(wrapped, instance, args, kwargs, handler=None):
     prompt = kwargs.get("prompt") or (args[0] if args else "")
     options = kwargs.get("options")
 
-    if options:
-        _inject_tracing_hooks(options)
-    elif options is None:
-        try:
-            options = ClaudeAgentOptions()
-            _inject_tracing_hooks(options)
-            kwargs["options"] = options
-        except Exception as e:
-            logger.warning(f"Failed to create ClaudeAgentOptions: {e}")
-
     model = get_model_from_options_or_env(options)
     prompt_str = str(prompt) if isinstance(prompt, str) else ""
 
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
index a66683995..1ca2f916f 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/utils.py
@@ -196,46 +196,3 @@ def extract_usage_from_result_message(message: Any) -> Dict[str, Any]:
         return {}
     metrics = extract_usage_metadata(message.usage)
     return sum_anthropic_tokens(metrics) if metrics else {}
-
-
-def truncate_value(value: Any, max_length: int = 150) -> str:
-    """
-    Truncate a value for display.
-
-    - For strings: truncate with ellipsis
-    - For lists: show first few items
-    - For dicts: show truncated version
-    - For other types: convert to string
-    """
-    if isinstance(value, str):
-        if len(value) <= max_length:
-            return value
-        return value[:max_length] + "..."
-
-    if isinstance(value, list):
-        if len(value) == 0:
-            return "[]"
-        if len(value) <= 3:
-            items_str = ", ".join(
-                truncate_value(item, max_length // 3) for item in value
-            )
-            if len(items_str) <= max_length:
-                return f"[{items_str}]"
-        first_items = ", ".join(
-            truncate_value(item, max_length // 4) for item in value[:2]
-        )
-        return f"[{first_items}, ... ({len(value)} items)]"
-
-    if isinstance(value, dict):
-        if len(value) == 0:
-            return "{}"
-        items = []
-        for i, (k, v) in enumerate(value.items()):
-            if i >= 2:
-                items.append(f"... ({len(value)} keys)")
-                break
-            v_str = truncate_value(v, max_length // 3)
-            items.append(f"{k}: {v_str}")
-        return "{" + ", ".join(items) + "}"
-
-    return str(value)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml
new file mode 100644
index 000000000..3dd068871
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml
@@ -0,0 +1,156 @@
+description: 'User query: echo ''Hello from hooks example!'''
+prompt: echo 'Hello from hooks example!'
+messages:
+- type: SystemMessage
+  subtype: init
+  data:
+    type: system
+    subtype: init
+    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples
+    session_id: cf630075-3251-4f84-a57d-4274afed0c46
+    tools:
+    - Task
+    - TaskOutput
+    - Bash
+    - Glob
+    - Grep
+    - ExitPlanMode
+    - Read
+    - Edit
+    - Write
+    - NotebookEdit
+    - WebFetch
+    - TodoWrite
+    - WebSearch
+    - KillShell
+    - AskUserQuestion
+    - Skill
+    - EnterPlanMode
+    mcp_servers: []
+    model: qwen-plus
+    permissionMode: default
+    slash_commands:
+    - compact
+    - context
+    - cost
+    - init
+    - pr-comments
+    - release-notes
+    - review
+    - security-review
+    apiKeySource: ANTHROPIC_API_KEY
+    claude_code_version: 2.1.1
+    output_style: default
+    agents:
+    - Bash
+    - general-purpose
+    - statusline-setup
+    - Explore
+    - Plan
+    skills: []
+    plugins: []
+    uuid: 8dd3b79c-8b8c-4649-8e4d-7eaa01374f1f
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: 'I''ll run the bash command `echo ''Hello from hooks example!''` for you.
+
+
+
+      '
+  parent_tool_use_id: null
+  error: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_601533b186c2423dad58e8
+    name: Bash
+    input:
+      command: echo 'Hello from hooks example!'
+      description: Print hello message from hooks example
+  parent_tool_use_id: null
+  error: null
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_601533b186c2423dad58e8
+    content: Hello from hooks example\!
+    is_error: false
+  uuid: a043c8a7-9b34-46db-adbe-d376748df781
+  parent_tool_use_id: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: 'The command executed successfully and printed: "Hello from hooks example!"
+
+
+      Note: The exclamation mark appears to have been escaped in the output as `\!`, which is normal behavior in some shell
+      contexts, but the message was displayed correctly.
+
+
+      Is there anything else you''d like me to help you with?'
+  parent_tool_use_id: null
+  error: null
+- type: ResultMessage
+  subtype: success
+  duration_ms: 5280
+  duration_api_ms: 16331
+  is_error: false
+  num_turns: 2
+  session_id: cf630075-3251-4f84-a57d-4274afed0c46
+  total_cost_usd: 0.221802
+  usage:
+    input_tokens: 30297
+    cache_creation_input_tokens: 0
+    cache_read_input_tokens: 0
+    output_tokens: 114
+    server_tool_use:
+      web_search_requests: 0
+      web_fetch_requests: 0
+    service_tier: standard
+    cache_creation:
+      ephemeral_1h_input_tokens: 0
+      ephemeral_5m_input_tokens: 0
+  result: 'The command executed successfully and printed: "Hello from hooks example!"...'
+  structured_output: null
+expected_spans:
+- name_pattern: invoke_agent
+  operation: invoke_agent
+  parent: null
+  attributes:
+    gen_ai.operation.name: invoke_agent
+    gen_ai.agent.name: claude-agent
+    gen_ai.conversation.id: cf630075-3251-4f84-a57d-4274afed0c46
+    gen_ai.usage.input_tokens: 30297
+    gen_ai.usage.output_tokens: 114
+  attributes_contains:
+    gen_ai.input.messages: echo 'Hello from hooks example!'
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages: echo 'Hello from hooks example!'
+- name_pattern: execute_tool
+  operation: execute_tool
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: execute_tool
+    gen_ai.tool.name: Bash
+    gen_ai.tool.call.id: call_601533b186c2423dad58e8
+  attributes_contains:
+    gen_ai.tool.call.arguments: echo 'Hello from hooks example!'
+  is_error: false
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages: echo 'Hello from hooks example!'
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml
new file mode 100644
index 000000000..e270b56b0
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml
@@ -0,0 +1,252 @@
+description: 'User query: ./foo.sh --help (被阻止的命令)'
+prompt: ./foo.sh --help
+messages:
+- type: SystemMessage
+  subtype: init
+  data:
+    type: system
+    subtype: init
+    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples
+    session_id: cf630075-3251-4f84-a57d-4274afed0c46
+    tools:
+    - Task
+    - TaskOutput
+    - Bash
+    - Glob
+    - Grep
+    - ExitPlanMode
+    - Read
+    - Edit
+    - Write
+    - NotebookEdit
+    - WebFetch
+    - TodoWrite
+    - WebSearch
+    - KillShell
+    - AskUserQuestion
+    - Skill
+    - EnterPlanMode
+    mcp_servers: []
+    model: qwen-plus
+    permissionMode: default
+    slash_commands:
+    - compact
+    - context
+    - cost
+    - init
+    - pr-comments
+    - release-notes
+    - review
+    - security-review
+    apiKeySource: ANTHROPIC_API_KEY
+    claude_code_version: 2.1.1
+    output_style: default
+    agents:
+    - Bash
+    - general-purpose
+    - statusline-setup
+    - Explore
+    - Plan
+    skills: []
+    plugins: []
+    uuid: cf949b1a-3a2e-4135-bcd8-5ae6a9c6ae1d
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: 'I''ll run the bash command `./foo.sh --help` for you.
+
+
+      '
+  parent_tool_use_id: null
+  error: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_33776c3031c148f6a02f71
+    name: Bash
+    input:
+      command: ./foo.sh --help
+      description: Run foo.sh script with help flag
+  parent_tool_use_id: null
+  error: null
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_33776c3031c148f6a02f71
+    content: 'Command contains invalid pattern: foo.sh'
+    is_error: true
+  uuid: 8173b335-1fa8-45d0-a65d-5d901d69a8c8
+  parent_tool_use_id: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: 'It looks like the `foo.sh` script doesn''t exist in the current directory or isn''t accessible. Let me check what
+      files are available in the current directory first to see what scripts we have:
+
+
+      '
+  parent_tool_use_id: null
+  error: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_2066d7694fd9488096cd3e
+    name: Bash
+    input:
+      command: ls -la
+      description: List all files in current directory
+  parent_tool_use_id: null
+  error: null
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_2066d7694fd9488096cd3e
+    content: 'total 432
+
+      drwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .
+
+      drwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..
+
+      drwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__
+
+      -rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py
+
+      -rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py
+
+      ...'
+    is_error: false
+  uuid: 9d0f6d9c-2715-47b1-9a5c-86237cd935ab
+  parent_tool_use_id: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: "I can see that there's no `foo.sh` script in the current directory. Looking at the files listed, I can see several\
+      \ Python scripts (`.py` files) and one text file (`hello.txt`). \n\nThe closest executable file I can see is `pretooluse_example.py`\
+      \ which has execute permissions (`-rwxr-xr-x`), but it's a Python script, not a shell script.\n\nWould you like me to:\n\
+      - Check if there are any other shell scripts (`.sh` files) in subdirectories?\n- Run the `--help` command on one of\
+      \ the Python scripts instead (like `python pretooluse_example.py --help`)?\n- Or did you mean to run a different command\
+      \ or script?\n\nLet me know what you'd like to do next!"
+  parent_tool_use_id: null
+  error: null
+- type: ResultMessage
+  subtype: success
+  duration_ms: 11929
+  duration_api_ms: 11297
+  is_error: false
+  num_turns: 3
+  session_id: cf630075-3251-4f84-a57d-4274afed0c46
+  total_cost_usd: 0.129201
+  usage:
+    input_tokens: 41647
+    cache_creation_input_tokens: 0
+    cache_read_input_tokens: 0
+    output_tokens: 284
+    server_tool_use:
+      web_search_requests: 0
+      web_fetch_requests: 0
+    service_tier: standard
+    cache_creation:
+      ephemeral_1h_input_tokens: 0
+      ephemeral_5m_input_tokens: 0
+  result: I can see that there's no `foo.sh` script in the current directory...
+  structured_output: null
+expected_spans:
+- name_pattern: invoke_agent
+  operation: invoke_agent
+  parent: null
+  attributes:
+    gen_ai.operation.name: invoke_agent
+    gen_ai.agent.name: claude-agent
+    gen_ai.conversation.id: cf630075-3251-4f84-a57d-4274afed0c46
+    gen_ai.usage.input_tokens: 41647
+    gen_ai.usage.output_tokens: 284
+  attributes_contains:
+    gen_ai.input.messages: ./foo.sh --help
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages: ./foo.sh --help
+    gen_ai.output.messages:
+    - I'll run the bash command
+    - tool_call
+    gen_ai.response.finish_reasons:
+    - stop
+- name_pattern: execute_tool
+  operation: execute_tool
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: execute_tool
+    gen_ai.tool.name: Bash
+    gen_ai.tool.call.id: call_33776c3031c148f6a02f71
+  attributes_contains:
+    gen_ai.tool.call.arguments: ./foo.sh --help
+    gen_ai.tool.call.result: Command contains invalid pattern
+  is_error: true
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages:
+    - ./foo.sh --help
+    - tool_call
+    - tool_call_response
+    gen_ai.output.messages:
+    - doesn't exist
+    - tool_call
+    gen_ai.response.finish_reasons:
+    - stop
+- name_pattern: execute_tool
+  operation: execute_tool
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: execute_tool
+    gen_ai.tool.name: Bash
+    gen_ai.tool.call.id: call_2066d7694fd9488096cd3e
+  attributes_contains:
+    gen_ai.tool.call.arguments: ls -la
+    gen_ai.tool.call.result: total
+  is_error: false
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages:
+    - ./foo.sh --help
+    - tool_call
+    - tool_call_response
+    - ls -la
+    gen_ai.output.messages:
+    - no `foo.sh` script
+    gen_ai.response.finish_reasons:
+    - stop
+  message_structure:
+    gen_ai.input.messages:
+      roles:
+      - user
+      - assistant
+      - tool
+      - assistant
+      - tool
+      parts_types:
+      - - text
+      - - text
+        - tool_call
+      - - tool_call_response
+      - - text
+        - tool_call
+      - - tool_call_response
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml
new file mode 100644
index 000000000..3889d387d
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml
@@ -0,0 +1,252 @@
+description: 'User query: Run the bash command: ./foo.sh --help (PreToolUse hook 阻止的命令)'
+prompt: 'Run the bash command: ./foo.sh --help'
+messages:
+- type: SystemMessage
+  subtype: init
+  data:
+    type: system
+    subtype: init
+    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples
+    session_id: cf630075-3251-4f84-a57d-4274afed0c46
+    tools:
+    - Task
+    - TaskOutput
+    - Bash
+    - Glob
+    - Grep
+    - ExitPlanMode
+    - Read
+    - Edit
+    - Write
+    - NotebookEdit
+    - WebFetch
+    - TodoWrite
+    - WebSearch
+    - KillShell
+    - AskUserQuestion
+    - Skill
+    - EnterPlanMode
+    mcp_servers: []
+    model: qwen-plus
+    permissionMode: default
+    slash_commands:
+    - compact
+    - context
+    - cost
+    - init
+    - pr-comments
+    - release-notes
+    - review
+    - security-review
+    apiKeySource: ANTHROPIC_API_KEY
+    claude_code_version: 2.1.1
+    output_style: default
+    agents:
+    - Bash
+    - general-purpose
+    - statusline-setup
+    - Explore
+    - Plan
+    skills: []
+    plugins: []
+    uuid: cf949b1a-3a2e-4135-bcd8-5ae6a9c6ae1d
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: 'I''ll run the bash command `./foo.sh --help` for you.
+
+
+      '
+  parent_tool_use_id: null
+  error: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_33776c3031c148f6a02f71
+    name: Bash
+    input:
+      command: ./foo.sh --help
+      description: Run foo.sh script with help flag
+  parent_tool_use_id: null
+  error: null
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_33776c3031c148f6a02f71
+    content: 'Command contains invalid pattern: foo.sh'
+    is_error: true
+  uuid: 8173b335-1fa8-45d0-a65d-5d901d69a8c8
+  parent_tool_use_id: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: 'It looks like the `foo.sh` script doesn''t exist in the current directory or isn''t accessible. Let me check what
+      files are available in the current directory first to see what scripts we have:
+
+
+      '
+  parent_tool_use_id: null
+  error: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_2066d7694fd9488096cd3e
+    name: Bash
+    input:
+      command: ls -la
+      description: List all files in current directory
+  parent_tool_use_id: null
+  error: null
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_2066d7694fd9488096cd3e
+    content: 'total 432
+
+      drwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .
+
+      drwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..
+
+      drwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__
+
+      -rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py
+
+      -rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py
+
+      ...'
+    is_error: false
+  uuid: 9d0f6d9c-2715-47b1-9a5c-86237cd935ab
+  parent_tool_use_id: null
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: "I can see that there's no `foo.sh` script in the current directory. Looking at the files listed, I can see several\
+      \ Python scripts (`.py` files) and one text file (`hello.txt`). \n\nThe closest executable file I can see is `pretooluse_example.py`\
+      \ which has execute permissions (`-rwxr-xr-x`), but it's a Python script, not a shell script.\n\nWould you like me to:\n\
+      - Check if there are any other shell scripts (`.sh` files) in subdirectories?\n- Run the `--help` command on one of\
+      \ the Python scripts instead (like `python pretooluse_example.py --help`)?\n- Or did you mean to run a different command\
+      \ or script?\n\nLet me know what you'd like to do next!"
+  parent_tool_use_id: null
+  error: null
+- type: ResultMessage
+  subtype: success
+  duration_ms: 11929
+  duration_api_ms: 11297
+  is_error: false
+  num_turns: 3
+  session_id: cf630075-3251-4f84-a57d-4274afed0c46
+  total_cost_usd: 0.129201
+  usage:
+    input_tokens: 41647
+    cache_creation_input_tokens: 0
+    cache_read_input_tokens: 0
+    output_tokens: 284
+    server_tool_use:
+      web_search_requests: 0
+      web_fetch_requests: 0
+    service_tier: standard
+    cache_creation:
+      ephemeral_1h_input_tokens: 0
+      ephemeral_5m_input_tokens: 0
+  result: I can see that there's no `foo.sh` script in the current directory...
+  structured_output: null
+expected_spans:
+- name_pattern: invoke_agent
+  operation: invoke_agent
+  parent: null
+  attributes:
+    gen_ai.operation.name: invoke_agent
+    gen_ai.agent.name: claude-agent
+    gen_ai.conversation.id: cf630075-3251-4f84-a57d-4274afed0c46
+    gen_ai.usage.input_tokens: 41647
+    gen_ai.usage.output_tokens: 284
+  attributes_contains:
+    gen_ai.input.messages: 'Run the bash command: ./foo.sh --help'
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages: 'Run the bash command: ./foo.sh --help'
+    gen_ai.output.messages:
+    - I'll run the bash command
+    - tool_call
+    gen_ai.response.finish_reasons:
+    - stop
+- name_pattern: execute_tool
+  operation: execute_tool
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: execute_tool
+    gen_ai.tool.name: Bash
+    gen_ai.tool.call.id: call_33776c3031c148f6a02f71
+    gen_ai.tool.call.result: 'Command contains invalid pattern: foo.sh'
+  attributes_contains:
+    gen_ai.tool.call.arguments: ./foo.sh --help
+  is_error: true
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages:
+    - 'Run the bash command: ./foo.sh --help'
+    - tool_call
+    - tool_call_response
+    gen_ai.output.messages:
+    - doesn't exist
+    - tool_call
+    gen_ai.response.finish_reasons:
+    - stop
+- name_pattern: execute_tool
+  operation: execute_tool
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: execute_tool
+    gen_ai.tool.name: Bash
+    gen_ai.tool.call.id: call_2066d7694fd9488096cd3e
+  attributes_contains:
+    gen_ai.tool.call.arguments: ls -la
+    gen_ai.tool.call.result: total
+  is_error: false
+- name_pattern: chat
+  operation: chat
+  parent: invoke_agent
+  attributes:
+    gen_ai.operation.name: chat
+    gen_ai.request.model: qwen-plus
+  attributes_contains:
+    gen_ai.input.messages:
+    - 'Run the bash command: ./foo.sh --help'
+    - tool_call
+    - tool_call_response
+    - ls -la
+    gen_ai.output.messages:
+    - no `foo.sh` script
+    gen_ai.response.finish_reasons:
+    - stop
+  message_structure:
+    gen_ai.input.messages:
+      roles:
+      - user
+      - assistant
+      - tool
+      - assistant
+      - tool
+      parts_types:
+      - - text
+      - - text
+        - tool_call
+      - - tool_call_response
+      - - text
+        - tool_call
+      - - tool_call_response
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py
new file mode 100644
index 000000000..0a92dff58
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py
@@ -0,0 +1,39 @@
+"""Pytest configuration for Claude Agent SDK instrumentation tests."""
+
+import yaml
+from pathlib import Path
+from typing import Any, Dict, List
+import pytest
+
+
+def load_cassette(filename: str) -> Dict[str, Any]:
+    """从 cassettes 目录加载测试用例。
+    
+    Args:
+        filename: cassette 文件名
+        
+    Returns:
+        测试用例数据字典
+    """
+    cassette_path = Path(__file__).parent / "cassettes" / filename
+    
+    with open(cassette_path, 'r', encoding='utf-8') as f:
+        return yaml.safe_load(f)
+
+
+def get_all_cassettes() -> List[str]:
+    """获取所有 cassette 文件名。
+    
+    Returns:
+        cassette 文件名列表
+    """
+    cassettes_dir = Path(__file__).parent / "cassettes"
+    return sorted([f.name for f in cassettes_dir.glob("*.yaml")])
+
+
+# Pytest fixture for cassettes
+@pytest.fixture
+def cassette(request):
+    """加载指定的 cassette 文件。"""
+    filename = request.param
+    return load_cassette(filename)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py
new file mode 100644
index 000000000..62d757a1f
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py
@@ -0,0 +1,72 @@
+"""Extract test cases from test_message_flow_cases.py and save as YAML cassettes."""
+
+import json
+import yaml
+from pathlib import Path
+
+# 导入测试用例
+import sys
+sys.path.insert(0, str(Path(__file__).parent))
+from test_message_flow_cases import (
+    TEST_CASE_1_FOO_SH_COMMAND,
+    TEST_CASE_2_ECHO_COMMAND,
+    TEST_CASE_3_PRETOOLUSE,
+)
+
+
+def save_test_case_as_cassette(test_case, filename):
+    """保存测试用例为 YAML cassette 文件。"""
+    cassette_data = {
+        "description": test_case["description"],
+        "prompt": test_case["prompt"],
+        "messages": test_case["messages"],
+    }
+    
+    # 如果有 expected_spans，也保存
+    if "expected_spans" in test_case:
+        cassette_data["expected_spans"] = test_case["expected_spans"]
+    
+    cassettes_dir = Path(__file__).parent / "cassettes"
+    cassettes_dir.mkdir(exist_ok=True)
+    
+    output_file = cassettes_dir / filename
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        yaml.dump(
+            cassette_data,
+            f,
+            allow_unicode=True,
+            default_flow_style=False,
+            sort_keys=False,
+            width=120,
+        )
+    
+    print(f"✅ Saved: {output_file}")
+    return output_file
+
+
+def main():
+    """Extract and save all test cases."""
+    print("Extracting test cases to cassettes...\n")
+    
+    # 保存三个测试用例
+    save_test_case_as_cassette(
+        TEST_CASE_1_FOO_SH_COMMAND,
+        "test_foo_sh_command.yaml"
+    )
+    
+    save_test_case_as_cassette(
+        TEST_CASE_2_ECHO_COMMAND,
+        "test_echo_command.yaml"
+    )
+    
+    save_test_case_as_cassette(
+        TEST_CASE_3_PRETOOLUSE,
+        "test_pretooluse_hook.yaml"
+    )
+    
+    print("\n✅ All test cases extracted successfully!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
index af1d5e1bc..8725e3cd0 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
@@ -318,5 +318,4 @@ def test_hooks_are_exported():
 def test_utils_are_internal():
     """Test that utils are properly organized."""
     # Utils should have the helper functions
-    assert hasattr(utils, "truncate_value")
     assert hasattr(utils, "extract_usage_metadata")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
index 8f5604273..cb6735d1b 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_edge_cases.py
@@ -105,164 +105,6 @@ def test_instrumentor_multiple_instrument_uninstrument_cycles():
         instrumentor.uninstrument()
 
 
-def test_hook_with_none_tool_use_id(instrument):
-    """Test hook behavior with None tool_use_id."""
-    import asyncio  # noqa: PLC0415
-
-    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
-        post_tool_use_hook,
-        pre_tool_use_hook,
-    )
-
-    # Pre hook with None ID
-    result = asyncio.run(
-        pre_tool_use_hook(
-            {"tool_name": "test", "tool_input": {}},
-            None,  # tool_use_id is None
-            {},
-        )
-    )
-    assert result == {}
-
-    # Post hook with None ID
-    result = asyncio.run(
-        post_tool_use_hook(
-            {"tool_name": "test", "tool_response": "ok"},
-            None,  # tool_use_id is None
-            {},
-        )
-    )
-    assert result == {}
-
-
-def test_hook_with_empty_input_data(instrument):
-    """Test hook behavior with empty input data."""
-    import asyncio  # noqa: PLC0415
-
-    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
-        post_tool_use_hook,
-        pre_tool_use_hook,
-    )
-
-    # Pre hook with empty data
-    result = asyncio.run(
-        pre_tool_use_hook(
-            {},  # empty input
-            "test-id",
-            {},
-        )
-    )
-    assert result == {}
-
-    # Post hook with empty data
-    result = asyncio.run(
-        post_tool_use_hook(
-            {},  # empty input
-            "test-id",
-            {},
-        )
-    )
-    assert result == {}
-
-
-def test_context_clear_when_not_set():
-    """Test clearing context when nothing is set."""
-    from opentelemetry.instrumentation.claude_agent_sdk.context import (  # noqa: PLC0415
-        clear_parent_invocation,
-        get_parent_invocation,
-    )
-
-    # Clear when empty
-    clear_parent_invocation()
-
-    # Should return None
-    assert get_parent_invocation() is None
-
-    # Clear again (should not raise)
-    clear_parent_invocation()
-
-
-def test_context_set_with_none():
-    """Test setting context with None value."""
-    from opentelemetry.instrumentation.claude_agent_sdk.context import (  # noqa: PLC0415
-        clear_parent_invocation,
-        get_parent_invocation,
-        set_parent_invocation,
-    )
-
-    # Set to None
-    set_parent_invocation(None)
-
-    # Should retrieve None
-    assert get_parent_invocation() is None
-
-    clear_parent_invocation()
-
-
-def test_clear_active_tool_runs_with_empty_runs():
-    """Test clearing tool runs when there are none."""
-    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
-        clear_active_tool_runs,
-    )
-
-    # Should not raise even if no active runs
-    clear_active_tool_runs()
-
-
-def test_clear_active_tool_runs_multiple_times():
-    """Test clearing tool runs multiple times."""
-    from opentelemetry.instrumentation.claude_agent_sdk.hooks import (  # noqa: PLC0415
-        clear_active_tool_runs,
-    )
-
-    # Multiple clears should be safe
-    for _ in range(3):
-        clear_active_tool_runs()
-
-
-def test_utils_truncate_with_zero_max_length():
-    """Test truncate with zero max length."""
-    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
-        truncate_value,
-    )
-
-    result = truncate_value("hello", max_length=0)
-    # Should handle gracefully
-    assert isinstance(result, str)
-
-
-def test_utils_truncate_with_negative_max_length():
-    """Test truncate with negative max length."""
-    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
-        truncate_value,
-    )
-
-    result = truncate_value("hello", max_length=-1)
-    # Should handle gracefully
-    assert isinstance(result, str)
-
-
-def test_utils_truncate_with_circular_reference():
-    """Test truncate handles circular references."""
-    from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
-        truncate_value,
-    )
-
-    # Create circular reference
-    a = {}
-    b = {"parent": a}
-    a["child"] = b
-
-    # Should not infinite loop - will raise RecursionError which is expected
-    try:
-        result = truncate_value(a, max_length=100)
-        # If it doesn't raise, it should return a string
-        assert isinstance(result, str)
-    except RecursionError:
-        # This is acceptable - circular references are edge cases
-        pass
-
-
 def test_utils_extract_usage_with_non_numeric_strings():
     """Test usage extraction with string values."""
     from opentelemetry.instrumentation.claude_agent_sdk.utils import (  # noqa: PLC0415
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
index c4fa4cb97..de047c8f5 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
@@ -12,19 +12,9 @@
 from opentelemetry.instrumentation.claude_agent_sdk import (
     ClaudeAgentSDKInstrumentor,
 )
-from opentelemetry.instrumentation.claude_agent_sdk.context import (
-    clear_parent_invocation,
-    get_parent_invocation,
-    set_parent_invocation,
-)
-from opentelemetry.instrumentation.claude_agent_sdk.hooks import (
-    post_tool_use_hook,
-    pre_tool_use_hook,
-)
 from opentelemetry.instrumentation.claude_agent_sdk.utils import (
     extract_usage_metadata,
     sum_anthropic_tokens,
-    truncate_value,
 )
 from opentelemetry.sdk.metrics import MeterProvider
 
@@ -102,50 +92,6 @@ def test_utils_work_with_mock_data(instrument):
     assert summed["input_tokens"] == 100
     assert summed["output_tokens"] == 50
 
-    # Test truncation
-    truncated = truncate_value("test" * 100, max_length=50)
-    assert len(truncated) <= 53  # 50 + "..."
-
-
-def test_context_operations_isolated(instrument):
-    """Test context operations work in isolated test environment."""
-    # Set and retrieve
-    test_value = "test_invocation_123"
-    set_parent_invocation(test_value)
-    assert get_parent_invocation() == test_value
-
-    # Clear
-    clear_parent_invocation()
-    assert get_parent_invocation() is None
-
-
-def test_hooks_can_be_called_directly(instrument):
-    """Test that hooks can be called directly without crashing."""
-    # Call pre hook
-    tool_data = {
-        "tool_name": "TestTool",
-        "tool_input": {"param": "value"},
-    }
-
-    try:
-        result = asyncio.run(pre_tool_use_hook(tool_data, "tool_123", {}))
-        assert isinstance(result, dict)
-    except Exception as e:
-        # Hook might need full context, but shouldn't crash hard
-        print(f"Hook raised: {e}")
-
-    # Call post hook
-    result_data = {
-        "tool_name": "TestTool",
-        "tool_response": "success",
-    }
-
-    try:
-        result = asyncio.run(post_tool_use_hook(result_data, "tool_123", {}))
-        assert isinstance(result, dict)
-    except Exception as e:
-        print(f"Hook raised: {e}")
-
 
 def test_instrumentor_lifecycle_complete(tracer_provider):
     """Test complete instrumentor lifecycle."""
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py
new file mode 100644
index 000000000..85adb8162
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py
@@ -0,0 +1,1939 @@
+"""测试用例：基于真实的 message 流来测试 _process_agent_invocation_stream 函数。
+
+这些测试用例基于 claude-agent-sdk-python 的真实运行示例：
+1. foo.sh 命令示例 - 展示了工具调用和错误处理（多轮对话，包含错误场景）
+2. echo 命令示例 - 展示了简单的成功路径（单轮对话，成功场景）
+3. pretooluse 示例 - 展示了 PreToolUse hook 的使用（工具调用被 hook 阻止）
+
+测试数据来源：/Users/sipercai/.cursor/projects/Users-sipercai-project-pyins/terminals/6.txt:229-246
+"""
+
+import json
+from typing import Any, AsyncIterator, Dict, List
+from unittest.mock import MagicMock
+
+import pytest
+
+
+# ============================================================================
+# 辅助函数：从测试数据创建模拟的 message 对象
+# ============================================================================
+
+
+def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
+    """从测试数据字典创建模拟的 message 对象。
+    
+    Args:
+        message_data: 包含消息数据的字典
+        
+    Returns:
+        模拟的 message 对象，具有与真实对象相同的属性
+    """
+    mock_msg = MagicMock()
+    msg_type = message_data["type"]
+    
+    # 设置类型名称，让 isinstance 检查能够通过字符串比较
+    mock_msg.__class__.__name__ = msg_type
+    
+    if msg_type == "SystemMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.data = message_data["data"]
+        
+    elif msg_type == "AssistantMessage":
+        mock_msg.model = message_data["model"]
+        mock_msg.content = []
+        
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            block_type = block_data["type"]
+            mock_block.__class__.__name__ = block_type
+            
+            if block_type == "TextBlock":
+                mock_block.text = block_data["text"]
+            elif block_type == "ToolUseBlock":
+                mock_block.id = block_data["id"]
+                mock_block.name = block_data["name"]
+                mock_block.input = block_data["input"]
+            
+            mock_msg.content.append(mock_block)
+            
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        mock_msg.error = message_data.get("error")
+        
+    elif msg_type == "UserMessage":
+        mock_msg.content = []
+        
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            mock_block.__class__.__name__ = block_data["type"]
+            
+            if block_data["type"] == "ToolResultBlock":
+                mock_block.tool_use_id = block_data["tool_use_id"]
+                mock_block.content = block_data["content"]
+                mock_block.is_error = block_data["is_error"]
+                
+            mock_msg.content.append(mock_block)
+            
+        mock_msg.uuid = message_data.get("uuid")
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        
+    elif msg_type == "ResultMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.duration_ms = message_data["duration_ms"]
+        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
+        mock_msg.is_error = message_data["is_error"]
+        mock_msg.num_turns = message_data["num_turns"]
+        mock_msg.session_id = message_data.get("session_id")
+        mock_msg.total_cost_usd = message_data["total_cost_usd"]
+        mock_msg.usage = message_data["usage"]
+        mock_msg.result = message_data["result"]
+        mock_msg.structured_output = message_data.get("structured_output")
+    
+    return mock_msg
+
+
+async def create_mock_stream_from_messages(
+    messages: List[Dict[str, Any]]
+) -> AsyncIterator[Any]:
+    """创建模拟的 message 异步流。
+    
+    Args:
+        messages: 消息数据列表
+        
+    Yields:
+        模拟的 message 对象
+    """
+    for message_data in messages:
+        yield create_mock_message_from_data(message_data)
+
+
+# ============================================================================
+# 真实测试数据
+# ============================================================================
+
+TEST_CASE_1_FOO_SH_COMMAND = {
+    "description": "User query: ./foo.sh --help (被阻止的命令)",
+    "prompt": "./foo.sh --help",
+    "messages": [
+        {
+            "type": "SystemMessage",
+            "subtype": "init",
+            "data": {
+                "type": "system",
+                "subtype": "init",
+                "cwd": "/Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples",
+                "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
+                "tools": [
+                    "Task",
+                    "TaskOutput",
+                    "Bash",
+                    "Glob",
+                    "Grep",
+                    "ExitPlanMode",
+                    "Read",
+                    "Edit",
+                    "Write",
+                    "NotebookEdit",
+                    "WebFetch",
+                    "TodoWrite",
+                    "WebSearch",
+                    "KillShell",
+                    "AskUserQuestion",
+                    "Skill",
+                    "EnterPlanMode",
+                ],
+                "mcp_servers": [],
+                "model": "qwen-plus",
+                "permissionMode": "default",
+                "slash_commands": [
+                    "compact",
+                    "context",
+                    "cost",
+                    "init",
+                    "pr-comments",
+                    "release-notes",
+                    "review",
+                    "security-review",
+                ],
+                "apiKeySource": "ANTHROPIC_API_KEY",
+                "claude_code_version": "2.1.1",
+                "output_style": "default",
+                "agents": [
+                    "Bash",
+                    "general-purpose",
+                    "statusline-setup",
+                    "Explore",
+                    "Plan",
+                ],
+                "skills": [],
+                "plugins": [],
+                "uuid": "cf949b1a-3a2e-4135-bcd8-5ae6a9c6ae1d",
+            },
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "I'll run the bash command `./foo.sh --help` for you.\n\n",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "ToolUseBlock",
+                    "id": "call_33776c3031c148f6a02f71",
+                    "name": "Bash",
+                    "input": {
+                        "command": "./foo.sh --help",
+                        "description": "Run foo.sh script with help flag",
+                    },
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "UserMessage",
+            "content": [
+                {
+                    "type": "ToolResultBlock",
+                    "tool_use_id": "call_33776c3031c148f6a02f71",
+                    "content": "Command contains invalid pattern: foo.sh",
+                    "is_error": True,
+                }
+            ],
+            "uuid": "8173b335-1fa8-45d0-a65d-5d901d69a8c8",
+            "parent_tool_use_id": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "It looks like the `foo.sh` script doesn't exist in the current directory or isn't accessible. Let me check what files are available in the current directory first to see what scripts we have:\n\n",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "ToolUseBlock",
+                    "id": "call_2066d7694fd9488096cd3e",
+                    "name": "Bash",
+                    "input": {
+                        "command": "ls -la",
+                        "description": "List all files in current directory",
+                    },
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "UserMessage",
+            "content": [
+                {
+                    "type": "ToolResultBlock",
+                    "tool_use_id": "call_2066d7694fd9488096cd3e",
+                    "content": "total 432\ndrwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .\ndrwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..\ndrwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__\n-rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py\n-rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py\n...",
+                    "is_error": False,
+                }
+            ],
+            "uuid": "9d0f6d9c-2715-47b1-9a5c-86237cd935ab",
+            "parent_tool_use_id": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "I can see that there's no `foo.sh` script in the current directory. Looking at the files listed, I can see several Python scripts (`.py` files) and one text file (`hello.txt`). \n\nThe closest executable file I can see is `pretooluse_example.py` which has execute permissions (`-rwxr-xr-x`), but it's a Python script, not a shell script.\n\nWould you like me to:\n- Check if there are any other shell scripts (`.sh` files) in subdirectories?\n- Run the `--help` command on one of the Python scripts instead (like `python pretooluse_example.py --help`)?\n- Or did you mean to run a different command or script?\n\nLet me know what you'd like to do next!",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "ResultMessage",
+            "subtype": "success",
+            "duration_ms": 11929,
+            "duration_api_ms": 11297,
+            "is_error": False,
+            "num_turns": 3,
+            "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
+            "total_cost_usd": 0.129201,
+            "usage": {
+                "input_tokens": 41647,
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+                "output_tokens": 284,
+                "server_tool_use": {"web_search_requests": 0, "web_fetch_requests": 0},
+                "service_tier": "standard",
+                "cache_creation": {
+                    "ephemeral_1h_input_tokens": 0,
+                    "ephemeral_5m_input_tokens": 0,
+                },
+            },
+            "result": "I can see that there's no `foo.sh` script in the current directory...",
+            "structured_output": None,
+        },
+    ],
+    "expected_spans": [
+        {
+            "name_pattern": "invoke_agent",
+            "operation": "invoke_agent",
+            "parent": None,  # 根 span
+            "attributes": {
+                "gen_ai.operation.name": "invoke_agent",
+                "gen_ai.agent.name": "claude-agent",
+                "gen_ai.conversation.id": "cf630075-3251-4f84-a57d-4274afed0c46",
+                "gen_ai.usage.input_tokens": 41647,
+                "gen_ai.usage.output_tokens": 284,
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "./foo.sh --help",  # 包含 prompt
+            },
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "./foo.sh --help",  # 包含 prompt
+                "gen_ai.output.messages": ["I'll run the bash command", "tool_call"],
+                "gen_ai.response.finish_reasons": ["stop"],
+            },
+        },
+        {
+            "name_pattern": "execute_tool",
+            "operation": "execute_tool",
+            "parent": "invoke_agent",  # Agent span 的子 span（不是 LLM span）
+            "attributes": {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": "Bash",
+                "gen_ai.tool.call.id": "call_33776c3031c148f6a02f71",
+            },
+            "attributes_contains": {
+                "gen_ai.tool.call.arguments": "./foo.sh --help",  # 包含命令
+                "gen_ai.tool.call.result": "Command contains invalid pattern",  # 工具结果
+            },
+            "is_error": True,  # 第一个工具调用失败
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": [
+                    "./foo.sh --help",  # 用户初始请求
+                    "tool_call",  # 第一个 LLM 的 tool_call
+                    "tool_call_response",  # 第一个工具的结果
+                ],
+                "gen_ai.output.messages": [
+                    "doesn't exist",  # 输出文本内容
+                    "tool_call",  # 第二个 tool_call
+                ],
+                "gen_ai.response.finish_reasons": ["stop"],
+            },
+        },
+        {
+            "name_pattern": "execute_tool",
+            "operation": "execute_tool",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": "Bash",
+                "gen_ai.tool.call.id": "call_2066d7694fd9488096cd3e",
+            },
+            "attributes_contains": {
+                "gen_ai.tool.call.arguments": "ls -la",  # 包含命令
+                "gen_ai.tool.call.result": "total",  # 工具结果（包含 ls 输出）
+            },
+            "is_error": False,  # 第二个工具调用成功
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": [
+                    "./foo.sh --help",  # 用户初始请求
+                    "tool_call",  # 历史 tool_call
+                    "tool_call_response",  # 历史工具结果
+                    "ls -la",  # 第二个工具的参数
+                ],
+                "gen_ai.output.messages": [
+                    "no `foo.sh` script",  # 输出文本内容
+                ],
+                "gen_ai.response.finish_reasons": ["stop"],
+            },
+            "message_structure": {
+                "gen_ai.input.messages": {
+                    "roles": ["user", "assistant", "tool", "assistant", "tool"],
+                    "parts_types": [
+                        ["text"],  # user message
+                        ["text", "tool_call"],  # assistant message 1
+                        ["tool_call_response"],  # tool response 1
+                        ["text", "tool_call"],  # assistant message 2
+                        ["tool_call_response"],  # tool response 2
+                    ],
+                }
+            },
+        },
+    ],
+}
+
+TEST_CASE_2_ECHO_COMMAND = {
+    "description": "User query: echo 'Hello from hooks example!'",
+    "prompt": "echo 'Hello from hooks example!'",
+    "messages": [
+        {
+            "type": "SystemMessage",
+            "subtype": "init",
+            "data": {
+                "type": "system",
+                "subtype": "init",
+                "cwd": "/Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples",
+                "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
+                "tools": [
+                    "Task",
+                    "TaskOutput",
+                    "Bash",
+                    "Glob",
+                    "Grep",
+                    "ExitPlanMode",
+                    "Read",
+                    "Edit",
+                    "Write",
+                    "NotebookEdit",
+                    "WebFetch",
+                    "TodoWrite",
+                    "WebSearch",
+                    "KillShell",
+                    "AskUserQuestion",
+                    "Skill",
+                    "EnterPlanMode",
+                ],
+                "mcp_servers": [],
+                "model": "qwen-plus",
+                "permissionMode": "default",
+                "slash_commands": [
+                    "compact",
+                    "context",
+                    "cost",
+                    "init",
+                    "pr-comments",
+                    "release-notes",
+                    "review",
+                    "security-review",
+                ],
+                "apiKeySource": "ANTHROPIC_API_KEY",
+                "claude_code_version": "2.1.1",
+                "output_style": "default",
+                "agents": [
+                    "Bash",
+                    "general-purpose",
+                    "statusline-setup",
+                    "Explore",
+                    "Plan",
+                ],
+                "skills": [],
+                "plugins": [],
+                "uuid": "8dd3b79c-8b8c-4649-8e4d-7eaa01374f1f",
+            },
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "I'll run the bash command `echo 'Hello from hooks example!'` for you.\n\n\n",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "ToolUseBlock",
+                    "id": "call_601533b186c2423dad58e8",
+                    "name": "Bash",
+                    "input": {
+                        "command": "echo 'Hello from hooks example!'",
+                        "description": "Print hello message from hooks example",
+                    },
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "UserMessage",
+            "content": [
+                {
+                    "type": "ToolResultBlock",
+                    "tool_use_id": "call_601533b186c2423dad58e8",
+                    "content": "Hello from hooks example\\!",
+                    "is_error": False,
+                }
+            ],
+            "uuid": "a043c8a7-9b34-46db-adbe-d376748df781",
+            "parent_tool_use_id": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": 'The command executed successfully and printed: "Hello from hooks example!"\n\nNote: The exclamation mark appears to have been escaped in the output as `\\!`, which is normal behavior in some shell contexts, but the message was displayed correctly.\n\nIs there anything else you\'d like me to help you with?',
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "ResultMessage",
+            "subtype": "success",
+            "duration_ms": 5280,
+            "duration_api_ms": 16331,
+            "is_error": False,
+            "num_turns": 2,
+            "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
+            "total_cost_usd": 0.221802,
+            "usage": {
+                "input_tokens": 30297,
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+                "output_tokens": 114,
+                "server_tool_use": {"web_search_requests": 0, "web_fetch_requests": 0},
+                "service_tier": "standard",
+                "cache_creation": {
+                    "ephemeral_1h_input_tokens": 0,
+                    "ephemeral_5m_input_tokens": 0,
+                },
+            },
+            "result": 'The command executed successfully and printed: "Hello from hooks example!"...',
+            "structured_output": None,
+        },
+    ],
+    "expected_spans": [
+        {
+            "name_pattern": "invoke_agent",
+            "operation": "invoke_agent",
+            "parent": None,  # 根 span
+            "attributes": {
+                "gen_ai.operation.name": "invoke_agent",
+                "gen_ai.agent.name": "claude-agent",
+                "gen_ai.conversation.id": "cf630075-3251-4f84-a57d-4274afed0c46",
+                "gen_ai.usage.input_tokens": 30297,
+                "gen_ai.usage.output_tokens": 114,
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "echo 'Hello from hooks example!'",  # 包含 prompt
+            },
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "echo 'Hello from hooks example!'",  # 包含 prompt
+            },
+        },
+        {
+            "name_pattern": "execute_tool",
+            "operation": "execute_tool",
+            "parent": "invoke_agent",  # Agent span 的子 span（不是 LLM span）
+            "attributes": {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": "Bash",
+                "gen_ai.tool.call.id": "call_601533b186c2423dad58e8",
+            },
+            "attributes_contains": {
+                "gen_ai.tool.call.arguments": "echo 'Hello from hooks example!'",  # 包含命令
+            },
+            "is_error": False,  # 工具调用成功
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "echo 'Hello from hooks example!'",  # 包含 prompt
+            },
+        },
+    ],
+}
+
+TEST_CASE_3_PRETOOLUSE = {
+    "description": "User query: Run the bash command: ./foo.sh --help (PreToolUse hook 阻止的命令)",
+    "prompt": "Run the bash command: ./foo.sh --help",
+    "messages": [
+        {
+            "type": "SystemMessage",
+            "subtype": "init",
+            "data": {
+                "type": "system",
+                "subtype": "init",
+                "cwd": "/Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples",
+                "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
+                "tools": [
+                    "Task",
+                    "TaskOutput",
+                    "Bash",
+                    "Glob",
+                    "Grep",
+                    "ExitPlanMode",
+                    "Read",
+                    "Edit",
+                    "Write",
+                    "NotebookEdit",
+                    "WebFetch",
+                    "TodoWrite",
+                    "WebSearch",
+                    "KillShell",
+                    "AskUserQuestion",
+                    "Skill",
+                    "EnterPlanMode",
+                ],
+                "mcp_servers": [],
+                "model": "qwen-plus",
+                "permissionMode": "default",
+                "slash_commands": [
+                    "compact",
+                    "context",
+                    "cost",
+                    "init",
+                    "pr-comments",
+                    "release-notes",
+                    "review",
+                    "security-review",
+                ],
+                "apiKeySource": "ANTHROPIC_API_KEY",
+                "claude_code_version": "2.1.1",
+                "output_style": "default",
+                "agents": [
+                    "Bash",
+                    "general-purpose",
+                    "statusline-setup",
+                    "Explore",
+                    "Plan",
+                ],
+                "skills": [],
+                "plugins": [],
+                "uuid": "cf949b1a-3a2e-4135-bcd8-5ae6a9c6ae1d",
+            },
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "I'll run the bash command `./foo.sh --help` for you.\n\n",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "ToolUseBlock",
+                    "id": "call_33776c3031c148f6a02f71",
+                    "name": "Bash",
+                    "input": {
+                        "command": "./foo.sh --help",
+                        "description": "Run foo.sh script with help flag",
+                    },
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "UserMessage",
+            "content": [
+                {
+                    "type": "ToolResultBlock",
+                    "tool_use_id": "call_33776c3031c148f6a02f71",
+                    "content": "Command contains invalid pattern: foo.sh",
+                    "is_error": True,
+                }
+            ],
+            "uuid": "8173b335-1fa8-45d0-a65d-5d901d69a8c8",
+            "parent_tool_use_id": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "It looks like the `foo.sh` script doesn't exist in the current directory or isn't accessible. Let me check what files are available in the current directory first to see what scripts we have:\n\n",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "ToolUseBlock",
+                    "id": "call_2066d7694fd9488096cd3e",
+                    "name": "Bash",
+                    "input": {
+                        "command": "ls -la",
+                        "description": "List all files in current directory",
+                    },
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "UserMessage",
+            "content": [
+                {
+                    "type": "ToolResultBlock",
+                    "tool_use_id": "call_2066d7694fd9488096cd3e",
+                    "content": "total 432\ndrwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .\ndrwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..\ndrwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__\n-rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py\n-rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py\n...",
+                    "is_error": False,
+                }
+            ],
+            "uuid": "9d0f6d9c-2715-47b1-9a5c-86237cd935ab",
+            "parent_tool_use_id": None,
+        },
+        {
+            "type": "AssistantMessage",
+            "model": "qwen-plus",
+            "content": [
+                {
+                    "type": "TextBlock",
+                    "text": "I can see that there's no `foo.sh` script in the current directory. Looking at the files listed, I can see several Python scripts (`.py` files) and one text file (`hello.txt`). \n\nThe closest executable file I can see is `pretooluse_example.py` which has execute permissions (`-rwxr-xr-x`), but it's a Python script, not a shell script.\n\nWould you like me to:\n- Check if there are any other shell scripts (`.sh` files) in subdirectories?\n- Run the `--help` command on one of the Python scripts instead (like `python pretooluse_example.py --help`)?\n- Or did you mean to run a different command or script?\n\nLet me know what you'd like to do next!",
+                }
+            ],
+            "parent_tool_use_id": None,
+            "error": None,
+        },
+        {
+            "type": "ResultMessage",
+            "subtype": "success",
+            "duration_ms": 11929,
+            "duration_api_ms": 11297,
+            "is_error": False,
+            "num_turns": 3,
+            "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
+            "total_cost_usd": 0.129201,
+            "usage": {
+                "input_tokens": 41647,
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+                "output_tokens": 284,
+                "server_tool_use": {"web_search_requests": 0, "web_fetch_requests": 0},
+                "service_tier": "standard",
+                "cache_creation": {
+                    "ephemeral_1h_input_tokens": 0,
+                    "ephemeral_5m_input_tokens": 0,
+                },
+            },
+            "result": "I can see that there's no `foo.sh` script in the current directory...",
+            "structured_output": None,
+        },
+    ],
+    "expected_spans": [
+        {
+            "name_pattern": "invoke_agent",
+            "operation": "invoke_agent",
+            "parent": None,  # 根 span
+            "attributes": {
+                "gen_ai.operation.name": "invoke_agent",
+                "gen_ai.agent.name": "claude-agent",
+                "gen_ai.conversation.id": "cf630075-3251-4f84-a57d-4274afed0c46",
+                "gen_ai.usage.input_tokens": 41647,
+                "gen_ai.usage.output_tokens": 284,
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "Run the bash command: ./foo.sh --help",  # 包含 prompt
+            },
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": "Run the bash command: ./foo.sh --help",  # 包含 prompt
+                "gen_ai.output.messages": ["I'll run the bash command", "tool_call"],
+                "gen_ai.response.finish_reasons": ["stop"],
+            },
+        },
+        {
+            "name_pattern": "execute_tool",
+            "operation": "execute_tool",
+            "parent": "invoke_agent",  # Agent span 的子 span（不是 LLM span）
+            "attributes": {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": "Bash",
+                "gen_ai.tool.call.id": "call_33776c3031c148f6a02f71",
+                "gen_ai.tool.call.result": "Command contains invalid pattern: foo.sh",
+            },
+            "attributes_contains": {
+                "gen_ai.tool.call.arguments": "./foo.sh --help",  # 包含命令
+            },
+            "is_error": True,  # 第一个工具调用被 PreToolUse hook 阻止
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": [
+                    "Run the bash command: ./foo.sh --help",  # 用户初始请求
+                    "tool_call",  # 第一个 LLM 的 tool_call
+                    "tool_call_response",  # 第一个工具的结果
+                ],
+                "gen_ai.output.messages": [
+                    "doesn't exist",  # 输出文本内容
+                    "tool_call",  # 第二个 tool_call
+                ],
+                "gen_ai.response.finish_reasons": ["stop"],
+            },
+        },
+        {
+            "name_pattern": "execute_tool",
+            "operation": "execute_tool",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "execute_tool",
+                "gen_ai.tool.name": "Bash",
+                "gen_ai.tool.call.id": "call_2066d7694fd9488096cd3e",
+            },
+            "attributes_contains": {
+                "gen_ai.tool.call.arguments": "ls -la",  # 包含命令
+                "gen_ai.tool.call.result": "total",  # 工具结果（包含 ls 输出）
+            },
+            "is_error": False,  # 第二个工具调用成功
+        },
+        {
+            "name_pattern": "chat",
+            "operation": "chat",
+            "parent": "invoke_agent",  # Agent span 的子 span
+            "attributes": {
+                "gen_ai.operation.name": "chat",
+                "gen_ai.request.model": "qwen-plus",
+            },
+            "attributes_contains": {
+                "gen_ai.input.messages": [
+                    "Run the bash command: ./foo.sh --help",  # 用户初始请求
+                    "tool_call",  # 历史 tool_call
+                    "tool_call_response",  # 历史工具结果
+                    "ls -la",  # 第二个工具的参数
+                ],
+                "gen_ai.output.messages": [
+                    "no `foo.sh` script",  # 输出文本内容
+                ],
+                "gen_ai.response.finish_reasons": ["stop"],
+            },
+            "message_structure": {
+                "gen_ai.input.messages": {
+                    "roles": ["user", "assistant", "tool", "assistant", "tool"],
+                    "parts_types": [
+                        ["text"],  # user message
+                        ["text", "tool_call"],  # assistant message 1
+                        ["tool_call_response"],  # tool response 1
+                        ["text", "tool_call"],  # assistant message 2
+                        ["tool_call_response"],  # tool response 2
+                    ],
+                }
+            },
+        },
+    ],
+}
+
+
+# ============================================================================
+# 实际测试函数
+# ============================================================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        pytest.param(TEST_CASE_1_FOO_SH_COMMAND, id="foo_sh_command"),
+        pytest.param(TEST_CASE_2_ECHO_COMMAND, id="echo_command"),
+        pytest.param(TEST_CASE_3_PRETOOLUSE, id="pretooluse_hook"),
+    ],
+)
+async def test_process_agent_invocation_stream_with_real_messages(
+    instrument, span_exporter, tracer_provider, test_case
+):
+    """使用真实的 message 数据测试 _process_agent_invocation_stream 函数。
+    
+    这个测试验证：
+    1. 函数能够正确处理所有类型的 message（SystemMessage, AssistantMessage, UserMessage, ResultMessage）
+    2. 消息能够正确 yield 出来
+    3. 不会抛出异常
+    4. 消息数量和顺序正确
+    
+    注意：这个测试只验证消息流处理逻辑，不验证 span 生成（那是上层函数的职责）
+    """
+    # 导入需要测试的函数
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    # 创建 handler（使用 tracer_provider）
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+
+    # 创建模拟的 stream
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+
+    # 处理 stream 并收集所有消息
+    received_messages = []
+    async for message in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model=test_case["messages"][0]["data"]["model"],  # 从 SystemMessage 获取 model
+        prompt=test_case["prompt"],
+    ):
+        received_messages.append(message)
+
+    # 验证：收到的消息数量应该等于输入消息数量
+    assert len(received_messages) == len(test_case["messages"]), (
+        f"期望收到 {len(test_case['messages'])} 条消息，"
+        f"实际收到 {len(received_messages)} 条"
+    )
+
+    # 验证：每条消息的类型应该正确
+    for i, (received, expected_data) in enumerate(
+        zip(received_messages, test_case["messages"])
+    ):
+        assert received.__class__.__name__ == expected_data["type"], (
+            f"第 {i} 条消息类型不匹配：期望 {expected_data['type']}，"
+            f"实际 {received.__class__.__name__}"
+        )
+
+    # 打印测试结果摘要
+    print(f"\n{'='*60}")
+    print(f"测试用例: {test_case['description']}")
+    print(f"消息总数: {len(received_messages)}")
+    print(f"消息类型:")
+    for i, msg in enumerate(received_messages):
+        print(f"  {i+1}. {msg.__class__.__name__}")
+    print(f"{'='*60}\n")
+
+
+@pytest.mark.asyncio
+async def test_foo_sh_command_detailed_validation(instrument, span_exporter, tracer_provider):
+    """详细验证 foo.sh 命令示例的消息流处理。
+    
+    这个测试针对 TEST_CASE_1_FOO_SH_COMMAND 进行详细验证：
+    - SystemMessage 的 data 字段
+    - AssistantMessage 的 content 块
+    - ToolUseBlock 的 id、name、input
+    - ToolResultBlock 的 is_error 字段
+    - ResultMessage 的 usage 统计
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+
+    mock_stream = create_mock_stream_from_messages(TEST_CASE_1_FOO_SH_COMMAND["messages"])
+    received_messages = []
+    
+    async for message in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=TEST_CASE_1_FOO_SH_COMMAND["prompt"],
+    ):
+        received_messages.append(message)
+
+    # 验证 SystemMessage
+    system_msg = received_messages[0]
+    assert system_msg.__class__.__name__ == "SystemMessage"
+    assert system_msg.subtype == "init"
+    assert system_msg.data["model"] == "qwen-plus"
+    assert "Bash" in system_msg.data["tools"]
+
+    # 验证第一个 ToolUseBlock (foo.sh --help)
+    tool_use_msg_1 = received_messages[2]  # 第3条消息
+    assert tool_use_msg_1.__class__.__name__ == "AssistantMessage"
+    assert len(tool_use_msg_1.content) == 1
+    tool_block_1 = tool_use_msg_1.content[0]
+    assert tool_block_1.__class__.__name__ == "ToolUseBlock"
+    assert tool_block_1.name == "Bash"
+    assert tool_block_1.id == "call_33776c3031c148f6a02f71"
+    assert tool_block_1.input["command"] == "./foo.sh --help"
+
+    # 验证第一个 ToolResultBlock (错误结果)
+    tool_result_msg_1 = received_messages[3]  # 第4条消息
+    assert tool_result_msg_1.__class__.__name__ == "UserMessage"
+    result_block_1 = tool_result_msg_1.content[0]
+    assert result_block_1.__class__.__name__ == "ToolResultBlock"
+    assert result_block_1.is_error is True  # 这是关键：错误场景
+    assert "invalid pattern" in result_block_1.content
+
+    # 验证第二个 ToolUseBlock (ls -la)
+    tool_use_msg_2 = received_messages[5]  # 第6条消息
+    tool_block_2 = tool_use_msg_2.content[0]
+    assert tool_block_2.name == "Bash"
+    assert tool_block_2.id == "call_2066d7694fd9488096cd3e"
+    assert tool_block_2.input["command"] == "ls -la"
+
+    # 验证第二个 ToolResultBlock (成功结果)
+    tool_result_msg_2 = received_messages[6]  # 第7条消息
+    result_block_2 = tool_result_msg_2.content[0]
+    assert result_block_2.is_error is False  # 成功
+
+    # 验证 ResultMessage
+    result_msg = received_messages[-1]
+    assert result_msg.__class__.__name__ == "ResultMessage"
+    assert result_msg.num_turns == 3
+    assert result_msg.is_error is False  # 整体成功
+    assert result_msg.usage["input_tokens"] == 41647
+    assert result_msg.usage["output_tokens"] == 284
+    assert result_msg.total_cost_usd == 0.129201
+
+    print(f"\n✅ foo.sh 命令示例详细验证通过")
+    print(f"  - 包含 2 个工具调用 (1个失败 + 1个成功)")
+    print(f"  - 总共 {result_msg.num_turns} 轮对话")
+    print(f"  - Input tokens: {result_msg.usage['input_tokens']}")
+    print(f"  - Output tokens: {result_msg.usage['output_tokens']}")
+    print(f"  - Cost: ${result_msg.total_cost_usd}\n")
+
+
+@pytest.mark.asyncio
+async def test_echo_command_detailed_validation(instrument, span_exporter, tracer_provider):
+    """详细验证 echo 命令示例的消息流处理。
+    
+    这个测试针对 TEST_CASE_2_ECHO_COMMAND 进行详细验证，
+    这是一个简单的成功场景（单工具调用，无错误）。
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+
+    mock_stream = create_mock_stream_from_messages(TEST_CASE_2_ECHO_COMMAND["messages"])
+    received_messages = []
+    
+    async for message in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=TEST_CASE_2_ECHO_COMMAND["prompt"],
+    ):
+        received_messages.append(message)
+
+    # 验证消息数量（应该比 foo.sh 示例少，因为只有1个工具调用）
+    assert len(received_messages) == 6
+
+    # 验证 SystemMessage
+    system_msg = received_messages[0]
+    assert system_msg.__class__.__name__ == "SystemMessage"
+
+    # 验证 ToolUseBlock
+    tool_use_msg = received_messages[2]
+    tool_block = tool_use_msg.content[0]
+    assert tool_block.name == "Bash"
+    assert tool_block.id == "call_601533b186c2423dad58e8"
+    assert "echo" in tool_block.input["command"]
+
+    # 验证 ToolResultBlock (应该是成功的)
+    tool_result_msg = received_messages[3]
+    result_block = tool_result_msg.content[0]
+    assert result_block.is_error is False
+    assert "Hello from hooks example" in result_block.content
+
+    # 验证 ResultMessage
+    result_msg = received_messages[-1]
+    assert result_msg.num_turns == 2  # 比 foo.sh 少1轮
+    assert result_msg.is_error is False
+    assert result_msg.total_cost_usd == 0.221802
+
+    print(f"\n✅ echo 命令示例详细验证通过")
+    print(f"  - 包含 1 个工具调用 (成功)")
+    print(f"  - 总共 {result_msg.num_turns} 轮对话")
+    print(f"  - Input tokens: {result_msg.usage['input_tokens']}")
+    print(f"  - Output tokens: {result_msg.usage['output_tokens']}")
+    print(f"  - Cost: ${result_msg.total_cost_usd}\n")
+
+
+@pytest.mark.asyncio
+async def test_message_stream_yields_all_messages_correctly(instrument, tracer_provider):
+    """测试消息流是否按顺序正确 yield 所有消息。
+    
+    这个测试验证 _process_agent_invocation_stream 函数的核心功能：
+    作为一个 async generator，它应该按顺序 yield 所有输入消息。
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+
+    # 测试三个用例
+    for test_case in [TEST_CASE_1_FOO_SH_COMMAND, TEST_CASE_2_ECHO_COMMAND, TEST_CASE_3_PRETOOLUSE]:
+        mock_stream = create_mock_stream_from_messages(test_case["messages"])
+        received_messages = []
+        
+        async for message in _process_agent_invocation_stream(
+            wrapped_stream=mock_stream,
+            handler=handler,
+            model="qwen-plus",
+            prompt=test_case["prompt"],
+        ):
+            received_messages.append(message)
+
+        # 验证消息顺序
+        for i, (received, expected_data) in enumerate(
+            zip(received_messages, test_case["messages"])
+        ):
+            assert received.__class__.__name__ == expected_data["type"], (
+                f"测试用例 '{test_case['description']}': "
+                f"第 {i} 条消息类型不匹配"
+            )
+
+    print("\n✅ 所有消息都按正确顺序 yield")
+
+
+# ============================================================================
+# Span 验证测试：验证 Agent、LLM、Tool spans 的正确性
+# ============================================================================
+
+
+def find_span_by_name(spans, name_pattern: str):
+    """查找包含指定名称模式的 span。"""
+    return [span for span in spans if name_pattern in span.name]
+
+
+def find_agent_span(spans):
+    """查找 Agent span。"""
+    agent_spans = find_span_by_name(spans, "invoke_agent")
+    if not agent_spans:
+        agent_spans = find_span_by_name(spans, "Agent")
+    return agent_spans[0] if agent_spans else None
+
+
+def find_llm_spans(spans):
+    """查找所有 LLM spans。"""
+    llm_spans = find_span_by_name(spans, "chat")
+    if not llm_spans:
+        llm_spans = find_span_by_name(spans, "LLM")
+    return llm_spans
+
+
+def find_tool_spans(spans):
+    """查找所有 Tool spans。"""
+    tool_spans = find_span_by_name(spans, "execute_tool")
+    if not tool_spans:
+        tool_spans = find_span_by_name(spans, "Tool")
+    return tool_spans
+
+
+@pytest.mark.asyncio
+async def test_agent_span_correctness(instrument, span_exporter, tracer_provider):
+    """验证 Agent span 的正确性。
+    
+    验证内容：
+    1. Agent span 存在且是根 span
+    2. Agent span 的名称正确
+    3. Agent span 的属性正确（system, model, operation.name, prompt 等）
+    4. Agent span 包含正确的 token 统计和 cost
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    test_case = TEST_CASE_1_FOO_SH_COMMAND
+
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+
+    # 验证 Agent span 存在
+    assert agent_span is not None, "应该有一个 Agent span"
+
+    # 验证 Agent span 是根 span（没有 parent）
+    assert agent_span.parent is None, "Agent span 应该是根 span，没有 parent"
+
+    # 验证 Agent span 名称
+    assert "invoke_agent" in agent_span.name or "Agent" in agent_span.name, (
+        f"Agent span 名称应该包含 'invoke_agent' 或 'Agent'，实际: {agent_span.name}"
+    )
+
+    attrs = dict(agent_span.attributes or {})
+
+    # 验证基本属性
+    assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs, "Agent span 应该有 gen_ai.operation.name 属性"
+    assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "invoke_agent", (
+        f"gen_ai.operation.name 应该是 'invoke_agent'，实际: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}"
+    )
+
+    # 验证 agent.name（如果存在）
+    if GenAIAttributes.GEN_AI_AGENT_NAME in attrs:
+        assert attrs[GenAIAttributes.GEN_AI_AGENT_NAME] == "claude-agent", (
+            f"gen_ai.agent.name 应该是 'claude-agent'，实际: {attrs.get(GenAIAttributes.GEN_AI_AGENT_NAME)}"
+        )
+
+        # 验证 input.messages（如果存在）
+        if GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs:
+            input_messages = attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES]
+            assert test_case["prompt"] in str(input_messages), (
+                f"gen_ai.input.messages 应该包含用户查询，实际: {input_messages}"
+            )
+
+    # 验证 token 统计（从 ResultMessage）
+    result_msg = test_case["messages"][-1]
+    if result_msg["type"] == "ResultMessage":
+        usage = result_msg["usage"]
+        if GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs:
+            assert attrs[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] == usage["input_tokens"], (
+                f"gen_ai.usage.input_tokens 应该是 {usage['input_tokens']}，"
+                f"实际: {attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)}"
+            )
+        if GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs:
+            assert attrs[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] == usage["output_tokens"], (
+                f"gen_ai.usage.output_tokens 应该是 {usage['output_tokens']}，"
+                f"实际: {attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS)}"
+            )
+
+    # 验证 conversation_id（如果存在）
+    if "session_id" in result_msg and result_msg.get("session_id"):
+        if GenAIAttributes.GEN_AI_CONVERSATION_ID in attrs:
+            assert attrs[GenAIAttributes.GEN_AI_CONVERSATION_ID] == result_msg["session_id"], (
+                f"gen_ai.conversation.id 应该是 {result_msg['session_id']}，"
+                f"实际: {attrs.get(GenAIAttributes.GEN_AI_CONVERSATION_ID)}"
+            )
+
+    print(f"\n✅ Agent span 验证通过")
+    print(f"  - 名称: {agent_span.name}")
+    print(f"  - Operation: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}")
+    if GenAIAttributes.GEN_AI_AGENT_NAME in attrs:
+        print(f"  - Agent name: {attrs.get(GenAIAttributes.GEN_AI_AGENT_NAME)}")
+    if GenAIAttributes.GEN_AI_CONVERSATION_ID in attrs:
+        print(f"  - Conversation ID: {attrs.get(GenAIAttributes.GEN_AI_CONVERSATION_ID)}")
+    if GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs:
+        print(f"  - Input tokens: {attrs[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS]}")
+    if GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs:
+        print(f"  - Output tokens: {attrs[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS]}")
+
+
+@pytest.mark.asyncio
+async def test_llm_span_correctness(instrument, span_exporter, tracer_provider):
+    """验证 LLM span 的正确性。
+    
+    验证内容：
+    1. LLM span 存在
+    2. LLM span 是 Agent span 的子 span
+    3. LLM span 的属性正确（provider, model, operation.name 等）
+    4. LLM span 包含正确的 input_messages 和 output_messages
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    test_case = TEST_CASE_1_FOO_SH_COMMAND
+
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    llm_spans = find_llm_spans(spans)
+
+    # 验证 LLM span 存在
+    assert len(llm_spans) > 0, f"应该有至少一个 LLM span，实际: {len(llm_spans)}"
+
+    # 验证每个 LLM span
+    for i, llm_span in enumerate(llm_spans):
+        attrs = dict(llm_span.attributes or {})
+
+        # 验证 LLM span 是 Agent span 的子 span
+        assert llm_span.parent is not None, "LLM span 应该有 parent"
+        assert llm_span.parent.span_id == agent_span.context.span_id, (
+            f"LLM span 的 parent 应该是 Agent span，"
+            f"实际 parent span_id: {llm_span.parent.span_id}, "
+            f"Agent span_id: {agent_span.context.span_id}"
+        )
+
+        # 验证基本属性
+        assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs, "LLM span 应该有 gen_ai.operation.name 属性"
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", (
+            f"gen_ai.operation.name 应该是 'chat'，实际: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}"
+        )
+
+        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs, "LLM span 应该有 gen_ai.provider.name 属性"
+        # Provider 可能是 "anthropic" 或其他值，取决于 infer_provider_from_base_url()
+
+        assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs, "LLM span 应该有 gen_ai.request.model 属性"
+        assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "qwen-plus", (
+            f"gen_ai.request.model 应该是 'qwen-plus'，实际: {attrs.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)}"
+        )
+
+        # 验证 input_messages（如果存在）
+        if GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs:
+            input_messages = attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES]
+            # input_messages 应该包含 prompt
+            assert test_case["prompt"] in str(input_messages), (
+                f"gen_ai.input.messages 应该包含用户查询，实际: {input_messages}"
+            )
+
+            # 验证 output_messages（如果存在）
+            if GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs:
+                output_messages_raw = attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]
+                assert output_messages_raw is not None, "gen_ai.output.messages 不应该为空"
+                
+                # 解析 output_messages（可能是 JSON 字符串或字典列表）
+                import json
+                if isinstance(output_messages_raw, str):
+                    try:
+                        output_messages = json.loads(output_messages_raw)
+                    except json.JSONDecodeError:
+                        output_messages = output_messages_raw
+                else:
+                    output_messages = output_messages_raw
+                
+                # 检查是否有重复的 tool_call（每个 tool_call.id 应该是唯一的）
+                if isinstance(output_messages, list):
+                    tool_call_ids = []
+                    for msg in output_messages:
+                        if isinstance(msg, dict) and msg.get("role") == "assistant":
+                            parts = msg.get("parts", [])
+                            for part in parts:
+                                if isinstance(part, dict) and part.get("type") == "tool_call":
+                                    tool_call_id = part.get("id")
+                                    if tool_call_id:
+                                        assert tool_call_id not in tool_call_ids, (
+                                            f"发现重复的 tool_call ID: {tool_call_id} in LLM span {i+1}. "
+                                            f"这表明同一个 tool_call 被记录了多次！"
+                                        )
+                                        tool_call_ids.append(tool_call_id)
+
+    print(f"\n✅ LLM span 验证通过")
+    print(f"  - LLM span 数量: {len(llm_spans)}")
+
+
+@pytest.mark.asyncio
+async def test_tool_span_correctness(instrument, span_exporter, tracer_provider):
+    """验证 Tool span 的正确性。
+    
+    验证内容：
+    1. Tool span 存在且数量正确
+    2. Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
+    3. Tool span 的属性正确（tool.name, tool.use.id, tool.input 等）
+    4. Tool span 包含正确的 output 和 is_error 状态
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    test_case = TEST_CASE_1_FOO_SH_COMMAND
+
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    tool_spans = find_tool_spans(spans)
+
+    # 验证 Tool span 存在且数量正确
+    # TEST_CASE_1 应该有 2 个 Tool spans（2 个 Bash 调用）
+    expected_tool_count = 2
+    assert len(tool_spans) == expected_tool_count, (
+        f"应该有 {expected_tool_count} 个 Tool span，实际: {len(tool_spans)}"
+    )
+
+    # 从测试数据中提取工具调用信息
+    tool_calls = []
+    for msg_data in test_case["messages"]:
+        if msg_data["type"] == "AssistantMessage":
+            for block in msg_data["content"]:
+                if block["type"] == "ToolUseBlock":
+                    tool_calls.append({
+                        "id": block["id"],
+                        "name": block["name"],
+                        "input": block["input"],
+                    })
+
+    # 从测试数据中提取工具结果信息
+    tool_results = {}
+    for msg_data in test_case["messages"]:
+        if msg_data["type"] == "UserMessage":
+            for block in msg_data["content"]:
+                if block["type"] == "ToolResultBlock":
+                    tool_results[block["tool_use_id"]] = {
+                        "content": block["content"],
+                        "is_error": block["is_error"],
+                    }
+
+    # 验证每个 Tool span
+    for tool_span in tool_spans:
+        attrs = dict(tool_span.attributes or {})
+
+        # 验证 Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
+        assert tool_span.parent is not None, "Tool span 应该有 parent"
+        assert tool_span.parent.span_id == agent_span.context.span_id, (
+            f"Tool span 的 parent 应该是 Agent span，"
+            f"实际 parent span_id: {tool_span.parent.span_id}, "
+            f"Agent span_id: {agent_span.context.span_id}"
+        )
+
+        # 验证基本属性
+        assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs, "Tool span 应该有 gen_ai.operation.name 属性"
+        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "execute_tool", (
+            f"gen_ai.operation.name 应该是 'execute_tool'，实际: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}"
+        )
+
+        assert GenAIAttributes.GEN_AI_TOOL_NAME in attrs, "Tool span 应该有 gen_ai.tool.name 属性"
+        tool_name = attrs[GenAIAttributes.GEN_AI_TOOL_NAME]
+        assert tool_name == "Bash", (
+            f"gen_ai.tool.name 应该是 'Bash'，实际: {tool_name}"
+        )
+
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_ID in attrs, "Tool span 应该有 gen_ai.tool.call.id 属性"
+        tool_use_id = attrs[GenAIAttributes.GEN_AI_TOOL_CALL_ID]
+
+        # 验证 tool_use_id 在测试数据中存在
+        matching_tool_call = next(
+            (tc for tc in tool_calls if tc["id"] == tool_use_id),
+            None
+        )
+        assert matching_tool_call is not None, (
+            f"Tool span 的 tool_use_id '{tool_use_id}' 应该在测试数据中存在"
+        )
+
+        # 验证 tool.input（检查所有可能的属性名称）
+        tool_input_attrs = [
+            "gen_ai.tool.input",
+            "gen_ai.tool.call.arguments",
+            "gen_ai.tool.arguments",
+        ]
+        tool_input_found = False
+        for attr_name in tool_input_attrs:
+            if attr_name in attrs:
+                tool_input = attrs[attr_name]
+                expected_input = matching_tool_call["input"]
+                # tool_input 可能是 JSON 字符串，需要解析比较
+                import json
+                if isinstance(tool_input, str):
+                    try:
+                        tool_input_dict = json.loads(tool_input)
+                        assert str(expected_input["command"]) in str(tool_input_dict), (
+                            f"{attr_name} 应该包含命令，实际: {tool_input_dict}"
+                        )
+                        tool_input_found = True
+                        break
+                    except json.JSONDecodeError:
+                        # 如果不是 JSON，直接比较字符串
+                        if str(expected_input["command"]) in tool_input:
+                            tool_input_found = True
+                            break
+                elif isinstance(tool_input, dict):
+                    assert str(expected_input["command"]) in str(tool_input), (
+                        f"{attr_name} 应该包含命令，实际: {tool_input}"
+                    )
+                    tool_input_found = True
+                    break
+
+        # 验证 tool.output 和 is_error（如果存在）
+        if tool_use_id in tool_results:
+            tool_result = tool_results[tool_use_id]
+            
+            # 检查所有可能的 output 属性名称
+            tool_output_attrs = [
+                "gen_ai.tool.output",
+                "gen_ai.tool.result",
+                "gen_ai.tool.response",
+            ]
+            for attr_name in tool_output_attrs:
+                if attr_name in attrs:
+                    tool_output = attrs[attr_name]
+                    # tool_output 应该包含结果内容
+                    assert tool_result["content"] in str(tool_output), (
+                        f"{attr_name} 应该包含工具结果，实际: {tool_output}"
+                    )
+                    break
+
+            # 验证 is_error 状态
+            if tool_result["is_error"]:
+                # 如果工具调用失败，span 应该有错误状态（ERROR = 2）
+                assert tool_span.status.status_code.value in [2], (
+                    f"失败的 Tool span 应该有错误状态（ERROR），实际: {tool_span.status.status_code}"
+                )
+                # 验证错误属性
+                if "error.type" in attrs:
+                    assert attrs["error.type"] is not None, "失败的 Tool span 应该有 error.type 属性"
+            else:
+                # 如果工具调用成功，span 应该是 OK 状态（OK = 1）或 UNSET（UNSET = 0，表示没有错误）
+                assert tool_span.status.status_code.value in [0, 1], (
+                    f"成功的 Tool span 应该是 OK 或 UNSET 状态，实际: {tool_span.status.status_code}"
+                )
+
+    print(f"\n✅ Tool span 验证通过")
+    print(f"  - Tool span 数量: {len(tool_spans)}")
+    for i, tool_span in enumerate(tool_spans):
+        attrs = dict(tool_span.attributes or {})
+        tool_use_id = attrs.get(GenAIAttributes.GEN_AI_TOOL_CALL_ID)
+        tool_result = tool_results.get(tool_use_id, {})
+        print(f"  - Tool span {i+1}:")
+        print(f"    - 名称: {tool_span.name}")
+        print(f"    - Tool name: {attrs.get(GenAIAttributes.GEN_AI_TOOL_NAME)}")
+        print(f"    - Tool call ID: {tool_use_id}")
+        print(f"    - Is error: {tool_result.get('is_error', False)}")
+        print(f"    - Status: {tool_span.status.status_code}")
+
+
+@pytest.mark.asyncio
+async def test_span_hierarchy_correctness(instrument, span_exporter, tracer_provider):
+    """验证 span 层次结构的正确性。
+    
+    验证内容：
+    1. Agent span 是根 span
+    2. LLM span 是 Agent span 的子 span
+    3. Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
+    4. 所有 spans 的层次结构正确
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    test_case = TEST_CASE_1_FOO_SH_COMMAND
+
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    llm_spans = find_llm_spans(spans)
+    tool_spans = find_tool_spans(spans)
+
+    # 验证 Agent span 是根 span
+    assert agent_span is not None, "应该有 Agent span"
+    assert agent_span.parent is None, "Agent span 应该是根 span，没有 parent"
+
+    # 验证 LLM span 是 Agent span 的子 span
+    assert len(llm_spans) > 0, "应该有至少一个 LLM span"
+    for llm_span in llm_spans:
+        assert llm_span.parent is not None, "LLM span 应该有 parent"
+        assert llm_span.parent.span_id == agent_span.context.span_id, (
+            f"LLM span 的 parent 应该是 Agent span"
+        )
+
+    # 验证 Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
+    assert len(tool_spans) > 0, "应该有至少一个 Tool span"
+    for tool_span in tool_spans:
+        assert tool_span.parent is not None, "Tool span 应该有 parent"
+        assert tool_span.parent.span_id == agent_span.context.span_id, (
+            f"Tool span 的 parent 应该是 Agent span，不是 LLM span"
+        )
+        # 确保 Tool span 不是 LLM span 的子 span
+        for llm_span in llm_spans:
+            assert tool_span.parent.span_id != llm_span.context.span_id, (
+                f"Tool span 不应该是 LLM span 的子 span"
+            )
+
+    print(f"\n✅ Span 层次结构验证通过")
+    print(f"  - Agent span: {agent_span.name} (根 span)")
+    print(f"  - LLM spans: {len(llm_spans)} 个（Agent 的子 span）")
+    print(f"  - Tool spans: {len(tool_spans)} 个（Agent 的子 span）")
+
+
+def validate_message_structure(messages, expected_structure):
+    """严格验证 messages 的结构是否符合期望。
+    
+    Args:
+        messages: 实际的 messages（JSON 字符串或列表）
+        expected_structure: 期望的结构定义
+        
+    Returns:
+        (bool, str): (是否匹配, 错误消息)
+    """
+    import json
+    
+    # 解析 messages
+    if isinstance(messages, str):
+        try:
+            messages = json.loads(messages)
+        except json.JSONDecodeError as e:
+            return False, f"JSON 解析失败: {e}"
+    
+    if not isinstance(messages, list):
+        return False, f"messages 应该是列表，实际: {type(messages)}"
+    
+    # 验证结构
+    if "roles" in expected_structure:
+        expected_roles = expected_structure["roles"]
+        actual_roles = [msg.get("role") for msg in messages if isinstance(msg, dict)]
+        if actual_roles != expected_roles:
+            return False, (
+                f"Role 顺序不匹配:\n"
+                f"  期望: {expected_roles}\n"
+                f"  实际: {actual_roles}"
+            )
+    
+    # 验证每个 message 的 parts 类型
+    if "parts_types" in expected_structure:
+        expected_parts_types = expected_structure["parts_types"]
+        for i, (msg, expected_types) in enumerate(zip(messages, expected_parts_types)):
+            if not isinstance(msg, dict):
+                continue
+            parts = msg.get("parts", [])
+            actual_types = [part.get("type") for part in parts if isinstance(part, dict)]
+            if actual_types != expected_types:
+                return False, (
+                    f"Message {i+1} 的 parts 类型不匹配:\n"
+                    f"  期望: {expected_types}\n"
+                    f"  实际: {actual_types}"
+                )
+    
+    return True, ""
+
+
+def match_span_to_expected(span, expected_span_def, parent_map):
+    """匹配实际 span 与期望的 span 定义。
+    
+    Args:
+        span: 实际的 span 对象
+        expected_span_def: 期望的 span 定义字典
+        parent_map: 父 span 的映射 {operation: span}
+        
+    Returns:
+        (bool, str): (是否匹配, 错误消息)
+    """
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    
+    attrs = dict(span.attributes or {})
+    
+    # 验证名称模式
+    if "name_pattern" in expected_span_def:
+        if expected_span_def["name_pattern"] not in span.name:
+            return False, f"Span 名称不匹配: 期望包含 '{expected_span_def['name_pattern']}', 实际: '{span.name}'"
+    
+    # 验证 operation
+    if "operation" in expected_span_def:
+        if GenAIAttributes.GEN_AI_OPERATION_NAME not in attrs:
+            return False, f"缺少 gen_ai.operation.name 属性"
+        if attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] != expected_span_def["operation"]:
+            return False, (
+                f"gen_ai.operation.name 不匹配: "
+                f"期望 '{expected_span_def['operation']}', "
+                f"实际 '{attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]}'"
+            )
+    
+    # 验证父 span
+    if "parent" in expected_span_def:
+        expected_parent = expected_span_def["parent"]
+        if expected_parent is None:
+            if span.parent is not None:
+                return False, f"期望根 span，但实际有 parent"
+        else:
+            if span.parent is None:
+                return False, f"期望 parent 是 '{expected_parent}'，但实际是根 span"
+            elif expected_parent in parent_map:
+                expected_parent_span = parent_map[expected_parent]
+                if span.parent.span_id != expected_parent_span.context.span_id:
+                    return False, (
+                        f"Parent span 不匹配: "
+                        f"期望 '{expected_parent}' (span_id={expected_parent_span.context.span_id}), "
+                        f"实际 span_id={span.parent.span_id}"
+                    )
+    
+    # 验证精确匹配的属性
+    if "attributes" in expected_span_def:
+        for attr_key, expected_value in expected_span_def["attributes"].items():
+            if attr_key not in attrs:
+                return False, f"缺少属性: {attr_key}"
+            if attrs[attr_key] != expected_value:
+                return False, (
+                    f"属性 {attr_key} 不匹配: "
+                    f"期望 {expected_value!r}, 实际 {attrs[attr_key]!r}"
+                )
+    
+    # 验证包含匹配的属性（值包含指定字符串或列表）
+    if "attributes_contains" in expected_span_def:
+        for attr_key, expected_value in expected_span_def["attributes_contains"].items():
+            if attr_key not in attrs:
+                return False, f"缺少属性: {attr_key}"
+            attr_value = attrs[attr_key]
+            
+            # 如果期望值是列表，验证每个元素
+            if isinstance(expected_value, list):
+                attr_value_str = str(attr_value)
+                for expected_item in expected_value:
+                    if expected_item not in attr_value_str:
+                        return False, (
+                            f"属性 {attr_key} 不包含期望内容: "
+                            f"期望包含 '{expected_item}', 实际: {attr_value_str[:200]}..."
+                        )
+            else:
+                # 字符串匹配
+                attr_value_str = str(attr_value)
+                if expected_value not in attr_value_str:
+                    return False, (
+                        f"属性 {attr_key} 不包含期望内容: "
+                        f"期望包含 '{expected_value}', 实际: {attr_value_str[:200]}..."
+                    )
+    
+    # 验证消息结构（如果定义了 message_structure）
+    if "message_structure" in expected_span_def:
+        for attr_key, expected_structure in expected_span_def["message_structure"].items():
+            if attr_key not in attrs:
+                return False, f"缺少属性: {attr_key}"
+            is_valid, error_msg = validate_message_structure(attrs[attr_key], expected_structure)
+            if not is_valid:
+                return False, f"属性 {attr_key} 结构验证失败: {error_msg}"
+    
+    # 验证错误状态
+    if "is_error" in expected_span_def:
+        expected_is_error = expected_span_def["is_error"]
+        actual_status = span.status.status_code.value
+        if expected_is_error:
+            # 期望错误状态（ERROR = 2）
+            if actual_status != 2:
+                return False, (
+                    f"期望错误状态，但实际状态码是 {actual_status}"
+                )
+        else:
+            # 期望成功状态（OK = 1 或 UNSET = 0）
+            if actual_status not in [0, 1]:
+                return False, (
+                    f"期望成功状态，但实际状态码是 {actual_status}"
+                )
+    
+    return True, ""
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        pytest.param(TEST_CASE_1_FOO_SH_COMMAND, id="foo_sh_command"),
+        pytest.param(TEST_CASE_2_ECHO_COMMAND, id="echo_command"),
+        pytest.param(TEST_CASE_3_PRETOOLUSE, id="pretooluse_hook"),
+    ],
+)
+async def test_all_spans_match_expected(
+    instrument, span_exporter, tracer_provider, test_case
+):
+    """验证实际生成的 spans 与 expected_spans 完全匹配。
+    
+    这个测试验证：
+    1. 生成的 spans 数量与 expected_spans 一致
+    2. 每个 span 的名称、操作类型、父 span 都匹配
+    3. 每个 span 的属性都完全匹配 expected_spans 中的定义
+    4. Span 的层次结构正确
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    expected_spans = test_case.get("expected_spans", [])
+
+    if not expected_spans:
+        pytest.skip(f"测试用例 '{test_case['description']}' 没有定义 expected_spans")
+
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    
+    # 构建父 span 映射（按 operation 名称）
+    parent_map = {}
+    for span in spans:
+        attrs = dict(span.attributes or {})
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
+            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
+            parent_map[operation] = span
+    
+    # 验证 spans 数量
+    assert len(spans) == len(expected_spans), (
+        f"生成的 spans 数量不匹配: "
+        f"期望 {len(expected_spans)} 个，实际 {len(spans)} 个"
+    )
+    
+    # 按 operation 类型分组 spans
+    spans_by_operation = {}
+    for span in spans:
+        attrs = dict(span.attributes or {})
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
+            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
+            if operation not in spans_by_operation:
+                spans_by_operation[operation] = []
+            spans_by_operation[operation].append(span)
+    
+    # 验证每个期望的 span
+    operation_index_map = {}  # 跟踪每个 operation 类型的索引
+    for i, expected_span_def in enumerate(expected_spans):
+        expected_operation = expected_span_def.get("operation")
+        
+        if expected_operation not in spans_by_operation:
+            pytest.fail(
+                f"期望的 span #{i+1} (operation={expected_operation}) 不存在于生成的 spans 中"
+            )
+        
+        # 获取对应 operation 类型的 span（按顺序）
+        if expected_operation not in operation_index_map:
+            operation_index_map[expected_operation] = 0
+        
+        operation_index = operation_index_map[expected_operation]
+        if operation_index >= len(spans_by_operation[expected_operation]):
+            pytest.fail(
+                f"期望的 span #{i+1} (operation={expected_operation}) "
+                f"超出了该类型的实际数量 ({len(spans_by_operation[expected_operation])})"
+            )
+        
+        actual_span = spans_by_operation[expected_operation][operation_index]
+        operation_index_map[expected_operation] += 1
+        
+        # 匹配 span
+        is_match, error_msg = match_span_to_expected(actual_span, expected_span_def, parent_map)
+        assert is_match, (
+            f"Span #{i+1} (operation={expected_operation}) 不匹配:\n"
+            f"  {error_msg}\n"
+            f"  Span 名称: {actual_span.name}\n"
+            f"  Span 属性: {dict(actual_span.attributes or {})}"
+        )
+    
+    print(f"\n✅ 测试用例 '{test_case['description']}' 的所有 spans 验证通过")
+    print(f"  - 总 spans 数: {len(spans)}")
+    for operation, span_list in spans_by_operation.items():
+        print(f"  - {operation} spans: {len(span_list)} 个")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
index 62f37f31d..af137dfa1 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_mocks.py
@@ -1,6 +1,5 @@
 """Mock-based tests for Claude Agent SDK instrumentation."""
 
-import threading
 from unittest.mock import Mock, patch
 
 import pytest
@@ -8,16 +7,10 @@
 from opentelemetry.instrumentation.claude_agent_sdk import (
     ClaudeAgentSDKInstrumentor,
 )
-from opentelemetry.instrumentation.claude_agent_sdk.context import (
-    clear_parent_invocation,
-    get_parent_invocation,
-    set_parent_invocation,
-)
 from opentelemetry.instrumentation.claude_agent_sdk.utils import (
     extract_usage_from_result_message,
     extract_usage_metadata,
     sum_anthropic_tokens,
-    truncate_value,
 )
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
@@ -135,57 +128,6 @@ def test_utils_sum_tokens_with_cache_details(instrument):
     assert result2["output_tokens"] == 50
 
 
-def test_utils_smart_truncate_edge_cases(instrument):
-    """Test smart truncate with various edge cases."""
-    # Empty string
-    assert truncate_value("") == ""
-
-    # None
-    assert truncate_value(None) == "None"
-
-    # Numbers
-    assert truncate_value(42) == "42"
-    assert truncate_value(3.14) == "3.14"
-
-    # Boolean
-    assert truncate_value(True) == "True"
-
-    # Empty list
-    assert truncate_value([]) == "[]"
-
-    # Empty dict
-    assert truncate_value({}) == "{}"
-
-    # Nested structures
-    nested = {"a": {"b": {"c": [1, 2, 3]}}}
-    result = truncate_value(nested)
-    assert isinstance(result, str)
-    assert "{" in result
-
-
-def test_context_thread_safety(instrument):
-    """Test context operations are thread-safe."""
-    results = []
-
-    def thread_func(value):
-        set_parent_invocation(value)
-        retrieved = get_parent_invocation()
-        results.append(retrieved == value)
-        clear_parent_invocation()
-
-    threads = []
-    for i in range(5):
-        t = threading.Thread(target=thread_func, args=(f"invocation_{i}",))
-        threads.append(t)
-        t.start()
-
-    for t in threads:
-        t.join()
-
-    # Each thread should have retrieved its own value
-    assert all(results)
-
-
 def test_instrumentor_double_instrument(instrument, tracer_provider):
     """Test that double instrumentation doesn't cause issues."""
     # First instrumentation already done by fixture
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
new file mode 100644
index 000000000..cb08b4c0e
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
@@ -0,0 +1,414 @@
+"""Specific validation tests for Claude Agent SDK instrumentation.
+
+These tests provide detailed validation for specific aspects of the instrumentation:
+- Agent span attributes and structure
+- LLM span input/output messages
+- Tool span attributes and results
+- Span hierarchy and timeline
+"""
+
+import pytest
+from pathlib import Path
+from typing import Any, Dict, List
+import yaml
+
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+from test_message_flow_cases import (
+    create_mock_stream_from_messages,
+)
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+
+def load_cassette(filename: str) -> Dict[str, Any]:
+    """Load a test case from cassettes directory."""
+    cassette_path = Path(__file__).parent / "cassettes" / filename
+    with open(cassette_path, 'r', encoding='utf-8') as f:
+        return yaml.safe_load(f)
+
+
+def find_agent_span(spans):
+    """Find the Agent span."""
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    for span in spans:
+        attrs = dict(span.attributes or {})
+        if attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent":
+            return span
+    return None
+
+
+def find_llm_spans(spans):
+    """Find all LLM spans."""
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    return [
+        s for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat"
+    ]
+
+
+def find_tool_spans(spans):
+    """Find all Tool spans."""
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    return [
+        s for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool"
+    ]
+
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def tracer_provider():
+    """Create a tracer provider for testing."""
+    return TracerProvider()
+
+
+@pytest.fixture
+def span_exporter(tracer_provider):
+    """Create an in-memory span exporter."""
+    exporter = InMemorySpanExporter()
+    tracer_provider.add_span_processor(SimpleSpanProcessor(exporter))
+    return exporter
+
+
+@pytest.fixture
+def instrument(tracer_provider):
+    """Instrument the Claude Agent SDK."""
+    from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
+        ClaudeAgentSDKInstrumentor,
+    )
+    
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    instrumentor.instrument(tracer_provider=tracer_provider)
+    yield instrumentor
+    instrumentor.uninstrument()
+
+
+# ============================================================================
+# Tests - Agent Span
+# ============================================================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("cassette_file", [
+    "test_foo_sh_command.yaml",
+    "test_echo_command.yaml",
+    "test_pretooluse_hook.yaml",
+])
+async def test_agent_span_correctness(
+    cassette_file, instrument, span_exporter, tracer_provider
+):
+    """验证 Agent span 的正确性。
+    
+    验证内容：
+    1. Agent span 存在且唯一
+    2. Agent span 是根 span（没有 parent）
+    3. Agent span 包含正确的属性（operation.name, agent.name 等）
+    4. Agent span 包含 token 使用统计
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    test_case = load_cassette(cassette_file)
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    
+    # 验证 Agent span 存在且唯一
+    agent_spans = [
+        s for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
+    ]
+    assert len(agent_spans) == 1, f"应该有且仅有一个 Agent span，实际: {len(agent_spans)}"
+    
+    # 验证是根 span
+    assert agent_span.parent is None, "Agent span 应该是根 span，没有 parent"
+    
+    # 验证必需属性
+    attrs = dict(agent_span.attributes or {})
+    assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs
+    assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "invoke_agent"
+    
+    # 验证包含 token 使用统计
+    assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs, "应该有 input_tokens"
+    assert GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs, "应该有 output_tokens"
+    
+    print(f"\n✅ Agent span 验证通过 ({cassette_file})")
+    print(f"  - Span 名称: {agent_span.name}")
+    print(f"  - Input tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)}")
+    print(f"  - Output tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS)}")
+
+
+# ============================================================================
+# Tests - LLM Span
+# ============================================================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("cassette_file", [
+    "test_foo_sh_command.yaml",
+    "test_echo_command.yaml",
+    "test_pretooluse_hook.yaml",
+])
+async def test_llm_span_correctness(
+    cassette_file, instrument, span_exporter, tracer_provider
+):
+    """验证 LLM span 的正确性。
+    
+    验证内容：
+    1. LLM span 存在且数量正确
+    2. LLM span 是 Agent span 的子 span
+    3. LLM span 的属性正确（model, provider, operation 等）
+    4. LLM span 的 output.messages 中 tool_call.id 唯一（无重复）
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    test_case = load_cassette(cassette_file)
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    llm_spans = find_llm_spans(spans)
+    
+    # 验证 LLM span 存在
+    assert len(llm_spans) > 0, "应该有至少一个 LLM span"
+    
+    # 验证所有 LLM span 是 Agent span 的子 span
+    for llm_span in llm_spans:
+        assert llm_span.parent is not None, "LLM span 应该有 parent"
+        assert llm_span.parent.span_id == agent_span.context.span_id, (
+            "LLM span 的 parent 应该是 Agent span"
+        )
+        
+        # 验证基本属性
+        attrs = dict(llm_span.attributes or {})
+        assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat"
+        assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs
+        
+        # 验证 output.messages 中 tool_call.id 唯一性
+        if GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs:
+            import json
+            output_messages_raw = attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]
+            if isinstance(output_messages_raw, str):
+                output_messages = json.loads(output_messages_raw)
+            else:
+                output_messages = output_messages_raw
+            
+            if isinstance(output_messages, list):
+                tool_call_ids = []
+                for msg in output_messages:
+                    if isinstance(msg, dict) and msg.get("role") == "assistant":
+                        parts = msg.get("parts", [])
+                        for part in parts:
+                            if isinstance(part, dict) and part.get("type") == "tool_call":
+                                tool_call_id = part.get("id")
+                                if tool_call_id:
+                                    assert tool_call_id not in tool_call_ids, (
+                                        f"发现重复的 tool_call ID: {tool_call_id}"
+                                    )
+                                    tool_call_ids.append(tool_call_id)
+    
+    print(f"\n✅ LLM span 验证通过 ({cassette_file})")
+    print(f"  - LLM span 数量: {len(llm_spans)}")
+
+
+# ============================================================================
+# Tests - Tool Span
+# ============================================================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("cassette_file", [
+    "test_foo_sh_command.yaml",
+    "test_echo_command.yaml",
+    "test_pretooluse_hook.yaml",
+])
+async def test_tool_span_correctness(
+    cassette_file, instrument, span_exporter, tracer_provider
+):
+    """验证 Tool span 的正确性。
+    
+    验证内容：
+    1. Tool span 存在且数量正确
+    2. Tool span 是 Agent span 的子 span（不是 LLM span）
+    3. Tool span 的属性正确（tool.name, tool.call.id, arguments, result 等）
+    4. Tool span 包含正确的 is_error 状态
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    test_case = load_cassette(cassette_file)
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    llm_spans = find_llm_spans(spans)
+    tool_spans = find_tool_spans(spans)
+    
+    # 验证 Tool span 存在
+    assert len(tool_spans) > 0, "应该有至少一个 Tool span"
+    
+    # 验证所有 Tool span 是 Agent span 的子 span（不是 LLM span）
+    for tool_span in tool_spans:
+        assert tool_span.parent is not None, "Tool span 应该有 parent"
+        assert tool_span.parent.span_id == agent_span.context.span_id, (
+            "Tool span 的 parent 应该是 Agent span，不是 LLM span"
+        )
+        
+        # 确保不是 LLM span 的子 span
+        for llm_span in llm_spans:
+            assert tool_span.parent.span_id != llm_span.context.span_id, (
+                "Tool span 不应该是 LLM span 的子 span"
+            )
+        
+        # 验证基本属性
+        attrs = dict(tool_span.attributes or {})
+        assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool"
+        assert GenAIAttributes.GEN_AI_TOOL_NAME in attrs, "应该有 tool.name"
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_ID in attrs, "应该有 tool.call.id"
+    
+    print(f"\n✅ Tool span 验证通过 ({cassette_file})")
+    print(f"  - Tool span 数量: {len(tool_spans)}")
+
+
+# ============================================================================
+# Tests - Span Hierarchy
+# ============================================================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("cassette_file", [
+    "test_foo_sh_command.yaml",
+    "test_echo_command.yaml",
+    "test_pretooluse_hook.yaml",
+])
+async def test_span_hierarchy_correctness(
+    cassette_file, instrument, span_exporter, tracer_provider
+):
+    """验证 Span 层次结构的正确性。
+    
+    验证内容：
+    1. Agent span 是根 span
+    2. LLM span 是 Agent span 的子 span
+    3. Tool span 是 Agent span 的子 span（不是 LLM span）
+    4. Span 的时间线是串行的（LLM → Tool → LLM）
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    test_case = load_cassette(cassette_file)
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    agent_span = find_agent_span(spans)
+    llm_spans = find_llm_spans(spans)
+    tool_spans = find_tool_spans(spans)
+    
+    # 验证 Agent span 是根 span
+    assert agent_span is not None, "应该有 Agent span"
+    assert agent_span.parent is None, "Agent span 应该是根 span"
+    
+    # 验证 LLM span 是 Agent span 的子 span
+    assert len(llm_spans) > 0, "应该有至少一个 LLM span"
+    for llm_span in llm_spans:
+        assert llm_span.parent is not None, "LLM span 应该有 parent"
+        assert llm_span.parent.span_id == agent_span.context.span_id, (
+            "LLM span 的 parent 应该是 Agent span"
+        )
+    
+    # 验证 Tool span 是 Agent span 的子 span
+    assert len(tool_spans) > 0, "应该有至少一个 Tool span"
+    for tool_span in tool_spans:
+        assert tool_span.parent is not None, "Tool span 应该有 parent"
+        assert tool_span.parent.span_id == agent_span.context.span_id, (
+            "Tool span 的 parent 应该是 Agent span"
+        )
+        
+        # 确保不是 LLM span 的子 span
+        for llm_span in llm_spans:
+            assert tool_span.parent.span_id != llm_span.context.span_id, (
+                "Tool span 不应该是 LLM span 的子 span"
+            )
+    
+    print(f"\n✅ Span 层次结构验证通过 ({cassette_file})")
+    print(f"  - Agent span: {agent_span.name} (根 span)")
+    print(f"  - LLM spans: {len(llm_spans)} 个（Agent 的子 span）")
+    print(f"  - Tool spans: {len(tool_spans)} 个（Agent 的子 span）")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py
new file mode 100644
index 000000000..e501357a3
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py
@@ -0,0 +1,247 @@
+"""Test Task tool specific behavior: span hierarchy and message filtering."""
+
+import json
+from pathlib import Path
+from typing import Any, AsyncIterator, Dict, List
+from unittest.mock import MagicMock
+
+import pytest
+import yaml
+
+
+def load_cassette(filename: str):
+    """Load cassette file from tests/cassettes directory."""
+    cassette_path = Path(__file__).parent / "cassettes" / filename
+    with open(cassette_path, "r", encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+
+def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
+    """Create a mock message object from cassette data."""
+    mock_msg = MagicMock()
+    mock_msg.__class__.__name__ = message_data["type"]
+    
+    # 基本属性
+    mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+    
+    if message_data["type"] == "SystemMessage":
+        mock_msg.subtype = message_data.get("subtype")
+        if "data" in message_data:
+            for key, value in message_data["data"].items():
+                setattr(mock_msg, key, value)
+    
+    elif message_data["type"] == "AssistantMessage":
+        mock_msg.model = message_data.get("model")
+        mock_msg.error = message_data.get("error")
+        mock_msg.content = []
+        
+        if "content" in message_data:
+            for block in message_data["content"]:
+                mock_block = MagicMock()
+                mock_block.__class__.__name__ = block["type"]
+                
+                if block["type"] == "TextBlock":
+                    mock_block.text = block.get("text", "")
+                elif block["type"] == "ToolUseBlock":
+                    mock_block.id = block.get("id")
+                    mock_block.name = block.get("name")
+                    mock_block.input = block.get("input", {})
+                
+                mock_msg.content.append(mock_block)
+    
+    elif message_data["type"] == "UserMessage":
+        mock_msg.uuid = message_data.get("uuid")
+        mock_msg.content = []
+        
+        if "content" in message_data:
+            for block in message_data["content"]:
+                mock_block = MagicMock()
+                mock_block.__class__.__name__ = block["type"]
+                
+                if block["type"] == "ToolResultBlock":
+                    mock_block.tool_use_id = block.get("tool_use_id")
+                    mock_block.content = block.get("content")
+                    mock_block.is_error = block.get("is_error", False)
+                elif block["type"] == "TextBlock":
+                    mock_block.text = block.get("text", "")
+                
+                mock_msg.content.append(mock_block)
+    
+    elif message_data["type"] == "ResultMessage":
+        mock_msg.subtype = message_data.get("subtype")
+        mock_msg.duration_ms = message_data.get("duration_ms")
+        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
+        mock_msg.is_error = message_data.get("is_error", False)
+        mock_msg.num_turns = message_data.get("num_turns")
+        mock_msg.session_id = message_data.get("session_id")
+        mock_msg.total_cost_usd = message_data.get("total_cost_usd")
+        mock_msg.usage = message_data.get("usage")
+        mock_msg.result = message_data.get("result")
+        mock_msg.structured_output = message_data.get("structured_output")
+    
+    return mock_msg
+
+
+async def create_mock_stream_from_messages(
+    messages: List[Dict[str, Any]]
+) -> AsyncIterator[Any]:
+    """Create mock async stream from message data."""
+    for message_data in messages:
+        yield create_mock_message_from_data(message_data)
+
+
+@pytest.mark.asyncio
+async def test_task_tool_span_hierarchy(instrument, span_exporter, tracer_provider):
+    """Test that Task tool creates proper span hierarchy with subagent spans as children."""
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    cassette = load_cassette("test_task_tool.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(cassette["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=cassette["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    
+    # Expected spans:
+    # 1. invoke_agent (root)
+    # 2. LLM₁ chat (main agent decides to use Task)
+    # 3. Task execute_tool (parent for subagent work)
+    # 4. LLM₂ chat (inside Task - child of Task span)
+    # 5. Read execute_tool (inside Task - child of Task span)
+    # 6. LLM₃ chat (inside Task - child of Task span)
+    # 7. LLM₄ chat (main agent summarizes)
+    
+    assert len(spans) >= 7, f"Expected at least 7 spans, got {len(spans)}"
+    
+    # Find spans by operation
+    agent_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "invoke_agent"]
+    llm_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "chat"]
+    tool_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "execute_tool"]
+    
+    assert len(agent_spans) == 1, f"Expected 1 agent span, got {len(agent_spans)}"
+    assert len(llm_spans) >= 4, f"Expected at least 4 LLM spans, got {len(llm_spans)}"
+    assert len(tool_spans) >= 2, f"Expected at least 2 tool spans (Task + Read), got {len(tool_spans)}"
+    
+    agent_span = agent_spans[0]
+    
+    # Find the Task tool span
+    task_spans = [s for s in tool_spans if s.attributes.get("gen_ai.tool.name") == "Task"]
+    assert len(task_spans) == 1, f"Expected 1 Task span, got {len(task_spans)}"
+    task_span = task_spans[0]
+    
+    # Verify Task span is child of agent span
+    assert task_span.parent is not None, "Task span should have a parent"
+    assert task_span.parent.span_id == agent_span.context.span_id, \
+        "Task span should be child of agent span"
+    
+    # Find the Read tool span (inside Task)
+    read_spans = [s for s in tool_spans if s.attributes.get("gen_ai.tool.name") == "Read"]
+    assert len(read_spans) == 1, f"Expected 1 Read span, got {len(read_spans)}"
+    read_span = read_spans[0]
+    
+    # Verify Read span is child of Task span
+    assert read_span.parent is not None, "Read span should have a parent"
+    assert read_span.parent.span_id == task_span.context.span_id, \
+        "Read span should be child of Task span (not agent span)"
+    
+    # Find LLM spans inside Task
+    # They should be children of Task span
+    task_llm_spans = [s for s in llm_spans if s.parent and s.parent.span_id == task_span.context.span_id]
+    
+    assert len(task_llm_spans) >= 2, \
+        f"Expected at least 2 LLM spans inside Task, got {len(task_llm_spans)}"
+
+
+@pytest.mark.asyncio
+async def test_task_tool_message_filtering(instrument, span_exporter, tracer_provider):
+    """Test that Task internal messages don't appear in parent LLM's input/output."""
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    cassette = load_cassette("test_task_tool.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(cassette["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=cassette["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    llm_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "chat"]
+    
+    # Find the last LLM span (LLM₄ - main agent summarizes after Task completes)
+    # This should be the LLM that receives the Task result
+    last_llm_span = llm_spans[-1]
+    
+    # Get input messages
+    input_messages_str = last_llm_span.attributes.get("gen_ai.input.messages")
+    assert input_messages_str is not None, "LLM span should have input.messages"
+    
+    try:
+        input_messages = json.loads(input_messages_str)
+    except (json.JSONDecodeError, TypeError):
+        input_messages = input_messages_str
+    
+    # The last LLM's input should contain:
+    # 1. User prompt
+    # 2. Assistant decision to use Task
+    # 3. Task tool_call
+    # 4. Task tool_call_response (result)
+    #
+    # It should NOT contain:
+    # - "I'll read the file first" (internal to Task)
+    # - Read tool call (internal to Task)
+    # - Read tool result (internal to Task)
+    # - "The code looks good..." (internal to Task)
+    
+    # Convert to string for easier checking
+    input_str = str(input_messages)
+    
+    # Should contain Task-level interactions
+    assert "Task" in input_str, "Should contain Task tool call"
+    assert "Code review completed" in input_str, "Should contain Task result"
+    
+    # Should NOT contain Task internal messages
+    assert "I'll read the file first" not in input_str, \
+        "Should NOT contain Task internal assistant message"
+    assert "call_read_001" not in input_str, \
+        "Should NOT contain Task internal Read tool call ID"
+    assert "class MyType" not in input_str, \
+        "Should NOT contain Task internal Read tool result"
+    assert "The code looks good" not in input_str, \
+        "Should NOT contain Task internal analysis text"
+    
+    # Get output messages
+    output_messages_str = last_llm_span.attributes.get("gen_ai.output.messages")
+    assert output_messages_str is not None, "LLM span should have output.messages"
+    
+    try:
+        output_messages = json.loads(output_messages_str)
+    except (json.JSONDecodeError, TypeError):
+        output_messages = output_messages_str
+    output_str = str(output_messages)
+    
+    # Output should be the final summary
+    assert "code-reviewer agent completed" in output_str.lower() or \
+           "analysis" in output_str.lower(), \
+        "Output should contain summary from main agent"
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py
index 70cc5b27a..9d3d629ae 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_unit.py
@@ -5,16 +5,10 @@
 from opentelemetry.instrumentation.claude_agent_sdk import (
     ClaudeAgentSDKInstrumentor,
 )
-from opentelemetry.instrumentation.claude_agent_sdk.context import (
-    clear_parent_invocation,
-    get_parent_invocation,
-    set_parent_invocation,
-)
 from opentelemetry.instrumentation.claude_agent_sdk.utils import (
     extract_usage_metadata,
     infer_provider_from_base_url,
     sum_anthropic_tokens,
-    truncate_value,
 )
 from opentelemetry.sdk.trace import TracerProvider
 
@@ -47,47 +41,6 @@ def test_instrumentation_dependencies():
     assert "claude-agent-sdk" in deps[0]
 
 
-def test_utils_safe_truncate():
-    """Test truncate_value utility function."""
-    # Test short string
-    result = truncate_value("hello")
-    assert result == "hello"
-
-    # Test long string
-    long_str = "a" * 200
-    result = truncate_value(long_str, max_length=150)
-    assert len(result) <= 153  # 150 + "..."
-    assert result.endswith("...")
-
-    # Test list
-    result = truncate_value([1, 2, 3])
-    assert "[" in result
-    assert "]" in result
-
-    # Test dict
-    result = truncate_value({"key": "value"})
-    assert "{" in result
-    assert "}" in result
-
-
-def test_context_operations():
-    """Test thread-local context operations."""
-    # Initially should be None
-    assert get_parent_invocation() is None
-
-    # Set a mock invocation
-    mock_invocation = {"test": "value"}
-    set_parent_invocation(mock_invocation)
-
-    # Should retrieve the same object
-    retrieved = get_parent_invocation()
-    assert retrieved == mock_invocation
-
-    # Clear should remove it
-    clear_parent_invocation()
-    assert get_parent_invocation() is None
-
-
 def test_usage_extraction():
     """Test usage metadata extraction."""
     # Test with dict
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
new file mode 100644
index 000000000..771309bfd
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
@@ -0,0 +1,313 @@
+"""Tests for Claude Agent SDK instrumentation using cassette-based test data.
+
+This test module uses YAML cassettes (similar to dashscope instrumentation) to test
+the _process_agent_invocation_stream function with real message sequences.
+"""
+
+import pytest
+import yaml
+from pathlib import Path
+from typing import Any, AsyncIterator, Dict, List
+from unittest.mock import MagicMock
+
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+
+# ============================================================================
+# Cassette Loading
+# ============================================================================
+
+
+def load_cassette(filename: str) -> Dict[str, Any]:
+    """Load test case from cassettes directory."""
+    cassette_path = Path(__file__).parent / "cassettes" / filename
+    
+    with open(cassette_path, 'r', encoding='utf-8') as f:
+        return yaml.safe_load(f)
+
+
+def get_all_cassettes() -> List[str]:
+    """Get all cassette file names."""
+    cassettes_dir = Path(__file__).parent / "cassettes"
+    return sorted([f.name for f in cassettes_dir.glob("test_*.yaml")])
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+
+def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
+    """Create a mock message object from test data dictionary."""
+    mock_msg = MagicMock()
+    msg_type = message_data["type"]
+    
+    mock_msg.__class__.__name__ = msg_type
+    
+    if msg_type == "SystemMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.data = message_data["data"]
+        
+    elif msg_type == "AssistantMessage":
+        mock_msg.model = message_data["model"]
+        mock_msg.content = []
+        
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            block_type = block_data["type"]
+            mock_block.__class__.__name__ = block_type
+            
+            if block_type == "TextBlock":
+                mock_block.text = block_data["text"]
+            elif block_type == "ToolUseBlock":
+                mock_block.id = block_data["id"]
+                mock_block.name = block_data["name"]
+                mock_block.input = block_data["input"]
+            
+            mock_msg.content.append(mock_block)
+            
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        mock_msg.error = message_data.get("error")
+        
+    elif msg_type == "UserMessage":
+        mock_msg.content = []
+        
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            mock_block.__class__.__name__ = block_data["type"]
+            
+            if block_data["type"] == "ToolResultBlock":
+                mock_block.tool_use_id = block_data["tool_use_id"]
+                mock_block.content = block_data["content"]
+                mock_block.is_error = block_data["is_error"]
+                
+            mock_msg.content.append(mock_block)
+            
+        mock_msg.uuid = message_data.get("uuid")
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        
+    elif msg_type == "ResultMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.duration_ms = message_data["duration_ms"]
+        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
+        mock_msg.is_error = message_data["is_error"]
+        mock_msg.num_turns = message_data["num_turns"]
+        mock_msg.session_id = message_data.get("session_id")
+        mock_msg.total_cost_usd = message_data["total_cost_usd"]
+        mock_msg.usage = message_data["usage"]
+        mock_msg.result = message_data["result"]
+        mock_msg.structured_output = message_data.get("structured_output")
+    
+    return mock_msg
+
+
+async def create_mock_stream_from_messages(
+    messages: List[Dict[str, Any]]
+) -> AsyncIterator[Any]:
+    """Create a mock async stream of messages."""
+    for message_data in messages:
+        yield create_mock_message_from_data(message_data)
+
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def tracer_provider():
+    """Create a tracer provider for testing."""
+    provider = TracerProvider()
+    return provider
+
+
+@pytest.fixture
+def span_exporter(tracer_provider):
+    """Create an in-memory span exporter."""
+    exporter = InMemorySpanExporter()
+    tracer_provider.add_span_processor(SimpleSpanProcessor(exporter))
+    return exporter
+
+
+@pytest.fixture
+def instrument(tracer_provider):
+    """Instrument the Claude Agent SDK."""
+    from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
+        ClaudeAgentSDKInstrumentor,
+    )
+    
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    instrumentor.instrument(tracer_provider=tracer_provider)
+    yield instrumentor
+    instrumentor.uninstrument()
+
+
+# ============================================================================
+# Tests
+# ============================================================================
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("cassette_file", get_all_cassettes())
+async def test_agent_invocation_with_cassette(
+    cassette_file, instrument, span_exporter, tracer_provider
+):
+    """测试使用 cassette 数据的 agent invocation。
+    
+    这个测试：
+    1. 从 cassette 文件加载真实的消息序列
+    2. 使用 _process_agent_invocation_stream 处理消息
+    3. 验证生成的 spans 数量和基本属性
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    
+    # 加载 cassette
+    test_case = load_cassette(cassette_file)
+    
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    # 处理消息流
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+    
+    # 验证生成的 spans
+    spans = span_exporter.get_finished_spans()
+    
+    # 基本验证
+    assert len(spans) > 0, f"应该生成至少一个 span for {cassette_file}"
+    
+    # 验证 Agent span 存在
+    agent_spans = [
+        s for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
+    ]
+    assert len(agent_spans) == 1, f"应该有一个 Agent span for {cassette_file}"
+    
+    # 验证 LLM spans 存在
+    llm_spans = [
+        s for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat"
+    ]
+    assert len(llm_spans) > 0, f"应该有至少一个 LLM span for {cassette_file}"
+    
+    print(f"\n✅ {cassette_file}: {len(spans)} spans (Agent: 1, LLM: {len(llm_spans)})")
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("cassette_file", get_all_cassettes())
+async def test_spans_match_expected(
+    cassette_file, instrument, span_exporter, tracer_provider
+):
+    """验证实际生成的 spans 与 expected_spans 完全匹配。
+    
+    这个测试验证：
+    1. 生成的 spans 数量与 expected_spans 一致
+    2. 每个 span 的名称、操作类型、父 span 都匹配
+    3. 每个 span 的属性都完全匹配 expected_spans 中的定义
+    4. Span 的层次结构正确
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
+        gen_ai_attributes as GenAIAttributes,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+    from test_message_flow_cases import (  # noqa: PLC0415
+        match_span_to_expected,
+    )
+    
+    # 加载 cassette
+    test_case = load_cassette(cassette_file)
+    expected_spans = test_case.get("expected_spans", [])
+    
+    if not expected_spans:
+        pytest.skip(f"{cassette_file} 没有定义 expected_spans")
+    
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+    
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+    
+    spans = span_exporter.get_finished_spans()
+    
+    # 构建父 span 映射
+    parent_map = {}
+    for span in spans:
+        attrs = dict(span.attributes or {})
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
+            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
+            parent_map[operation] = span
+    
+    # 验证 spans 数量
+    assert len(spans) == len(expected_spans), (
+        f"生成的 spans 数量不匹配: "
+        f"期望 {len(expected_spans)} 个，实际 {len(spans)} 个"
+    )
+    
+    # 按 operation 类型分组 spans
+    spans_by_operation = {}
+    for span in spans:
+        attrs = dict(span.attributes or {})
+        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
+            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
+            if operation not in spans_by_operation:
+                spans_by_operation[operation] = []
+            spans_by_operation[operation].append(span)
+    
+    # 验证每个期望的 span
+    operation_index_map = {}
+    for i, expected_span_def in enumerate(expected_spans):
+        expected_operation = expected_span_def.get("operation")
+        
+        if expected_operation not in spans_by_operation:
+            pytest.fail(
+                f"期望的 span #{i+1} (operation={expected_operation}) 不存在于生成的 spans 中"
+            )
+        
+        if expected_operation not in operation_index_map:
+            operation_index_map[expected_operation] = 0
+        
+        operation_index = operation_index_map[expected_operation]
+        if operation_index >= len(spans_by_operation[expected_operation]):
+            pytest.fail(
+                f"期望的 span #{i+1} (operation={expected_operation}) "
+                f"超出了该类型的实际数量 ({len(spans_by_operation[expected_operation])})"
+            )
+        
+        actual_span = spans_by_operation[expected_operation][operation_index]
+        operation_index_map[expected_operation] += 1
+        
+        # 匹配 span
+        is_match, error_msg = match_span_to_expected(actual_span, expected_span_def, parent_map)
+        assert is_match, (
+            f"Span #{i+1} (operation={expected_operation}) 不匹配:\n"
+            f"  {error_msg}\n"
+            f"  Span 名称: {actual_span.name}"
+        )
+    
+    print(f"\n✅ {cassette_file}: 所有 {len(expected_spans)} 个 spans 验证通过")

From da5c0cd2eb9de9e774869b9b0d8232129ddce002 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 26 Jan 2026 14:20:02 +0800
Subject: [PATCH 18/23] [fix] fix test

Change-Id: Ic5c347a1241bda769fe8af50dc50397fd5b4e869
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/patch.py |  103 +-
 .../tests/conftest_cassettes.py               |   39 -
 .../tests/extract_cassettes.py                |   72 -
 .../tests/test_integration.py                 |    5 +-
 .../tests/test_message_flow_cases.py          | 1939 -----------------
 .../tests/test_span_validation.py             |  418 ++--
 .../tests/test_task_tool.py                   |  247 ---
 .../tests/test_with_cassettes.py              |  223 +-
 8 files changed, 401 insertions(+), 2645 deletions(-)
 delete mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py
 delete mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py
 delete mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py
 delete mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 0f4e94dc0..341a97b23 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -19,12 +19,12 @@
 from typing import Any, Dict, List, Optional
 
 from opentelemetry import context as otel_context
-from opentelemetry.trace import set_span_in_context
 from opentelemetry.instrumentation.claude_agent_sdk.utils import (
     extract_usage_from_result_message,
     get_model_from_options_or_env,
     infer_provider_from_base_url,
 )
+from opentelemetry.trace import set_span_in_context
 from opentelemetry.util.genai.extended_handler import (
     ExtendedTelemetryHandler,
     get_extended_telemetry_handler,
@@ -53,7 +53,7 @@
 
 def _clear_client_managed_runs() -> None:
     """Clear all client-managed tool runs.
-    
+
     This should be called when a conversation ends to avoid memory leaks
     and to clean up any orphaned tool runs.
     """
@@ -115,7 +115,7 @@ def _create_tool_spans_from_message(
     exclude_tool_names: Optional[List[str]] = None,
 ) -> None:
     """Create tool execution spans from ToolUseBlocks in an AssistantMessage.
-    
+
     Tool spans are children of the active Task span (if any), otherwise agent span.
     When a Task tool is created, it's pushed onto active_task_stack.
     """
@@ -125,8 +125,12 @@ def _create_tool_spans_from_message(
     exclude_tool_names = exclude_tool_names or []
 
     # Determine parent span: use active Task span if exists, otherwise agent span
-    parent_span = active_task_stack[-1].span if active_task_stack else agent_invocation.span
-    
+    parent_span = (
+        active_task_stack[-1].span
+        if active_task_stack
+        else agent_invocation.span
+    )
+
     parent_context_token = None
     if parent_span:
         try:
@@ -140,7 +144,7 @@ def _create_tool_spans_from_message(
         for block in msg.content:
             if type(block).__name__ != "ToolUseBlock":
                 continue
-            
+
             tool_use_id = getattr(block, "id", None)
             tool_name = getattr(block, "name", "unknown_tool")
             tool_input = getattr(block, "input", {})
@@ -157,14 +161,18 @@ def _create_tool_spans_from_message(
                 )
                 handler.start_execute_tool(tool_invocation)
                 _client_managed_runs[tool_use_id] = tool_invocation
-                
+
                 # If this is a Task tool, push it onto the stack
                 if tool_name == "Task":
                     active_task_stack.append(tool_invocation)
-                    logger.debug(f"Task span created and pushed: {tool_use_id}, stack depth: {len(active_task_stack)}")
+                    logger.debug(
+                        f"Task span created and pushed: {tool_use_id}, stack depth: {len(active_task_stack)}"
+                    )
 
             except Exception as e:
-                logger.warning(f"Failed to create tool span for {tool_name}: {e}")
+                logger.warning(
+                    f"Failed to create tool span for {tool_name}: {e}"
+                )
     finally:
         if parent_context_token is not None:
             try:
@@ -209,14 +217,14 @@ def _process_assistant_message(
     parts = _extract_message_parts(msg)
     has_text_content = any(isinstance(p, Text) for p in parts)
     has_tool_calls = any(isinstance(p, ToolCall) for p in parts)
-    
+
     # Check if we're inside a Task
     is_inside_task = len(active_task_stack) > 0
 
     if has_text_content:
         if turn_tracker.current_llm_invocation:
             turn_tracker.close_llm_turn()
-        
+
         message_arrival_time = time.time()
 
         turn_tracker.start_llm_turn(
@@ -234,7 +242,7 @@ def _process_assistant_message(
                 role="assistant", parts=list(parts), finish_reason="stop"
             )
             agent_invocation.output_messages.append(output_msg)
-            
+
             # Only add to collected_messages if not inside a Task
             if not is_inside_task:
                 collected_messages.append(
@@ -244,7 +252,9 @@ def _process_assistant_message(
     elif has_tool_calls:
         if parts and turn_tracker.current_llm_invocation:
             if turn_tracker.current_llm_invocation.output_messages:
-                last_output_msg = turn_tracker.current_llm_invocation.output_messages[-1]
+                last_output_msg = (
+                    turn_tracker.current_llm_invocation.output_messages[-1]
+                )
                 last_output_msg.parts.extend(parts)
             else:
                 turn_tracker.add_assistant_output(parts)
@@ -253,8 +263,10 @@ def _process_assistant_message(
         if not is_inside_task:
             if parts and collected_messages:
                 last_msg = collected_messages[-1]
-                if (last_msg.get("role") == "assistant" and 
-                    turn_tracker.current_llm_invocation):
+                if (
+                    last_msg.get("role") == "assistant"
+                    and turn_tracker.current_llm_invocation
+                ):
                     last_parts = last_msg.get("parts", [])
                     last_parts.extend(parts)
                     last_msg["parts"] = last_parts
@@ -271,7 +283,9 @@ def _process_assistant_message(
     if has_tool_calls and turn_tracker.current_llm_invocation:
         turn_tracker.close_llm_turn()
 
-    _create_tool_spans_from_message(msg, handler, agent_invocation, active_task_stack)
+    _create_tool_spans_from_message(
+        msg, handler, agent_invocation, active_task_stack
+    )
 
 
 def _process_user_message(
@@ -284,14 +298,14 @@ def _process_user_message(
     """Process UserMessage: close tool spans, collect message content, mark next LLM start."""
     user_parts: List[MessagePart] = []
     tool_parts: List[MessagePart] = []
-    
+
     # Check if we're inside a Task
     is_inside_task = len(active_task_stack) > 0
-    
+
     if hasattr(msg, "content"):
         for block in msg.content:
             block_type = type(block).__name__
-            
+
             if block_type == "ToolResultBlock":
                 tool_use_id = getattr(block, "tool_use_id", None)
                 if tool_use_id and tool_use_id in _client_managed_runs:
@@ -316,14 +330,19 @@ def _process_user_message(
                         )
                     else:
                         handler.stop_execute_tool(tool_invocation)
-                    
+
                     # Check if this is a Task tool result - if so, pop from stack
                     # BEFORE we check is_inside_task for message filtering
-                    is_task_result = active_task_stack and active_task_stack[-1].tool_call_id == tool_use_id
+                    is_task_result = (
+                        active_task_stack
+                        and active_task_stack[-1].tool_call_id == tool_use_id
+                    )
                     if is_task_result:
                         active_task_stack.pop()
-                        logger.debug(f"Task span closed and popped: {tool_use_id}, stack depth: {len(active_task_stack)}")
-                
+                        logger.debug(
+                            f"Task span closed and popped: {tool_use_id}, stack depth: {len(active_task_stack)}"
+                        )
+
                 if tool_use_id:
                     tool_parts.append(
                         ToolCallResponse(
@@ -331,7 +350,7 @@ def _process_user_message(
                             response=tool_content if tool_content else "",
                         )
                     )
-            
+
             elif block_type == "TextBlock":
                 text_content = getattr(block, "text", "")
                 if text_content:
@@ -340,24 +359,30 @@ def _process_user_message(
     # Re-check if we're inside a Task AFTER popping Task results
     # This ensures Task tool results are NOT filtered out
     is_inside_task = len(active_task_stack) > 0
-    
+
     # Only add to collected_messages if not inside a Task
     if not is_inside_task:
         if user_parts:
             collected_messages.append({"role": "user", "parts": user_parts})
-        
+
         if tool_parts:
             if collected_messages:
                 last_msg = collected_messages[-1]
-                if (last_msg.get("role") == "tool" and 
-                    turn_tracker.current_llm_invocation):
+                if (
+                    last_msg.get("role") == "tool"
+                    and turn_tracker.current_llm_invocation
+                ):
                     last_parts = last_msg.get("parts", [])
                     last_parts.extend(tool_parts)
                     last_msg["parts"] = last_parts
                 else:
-                    collected_messages.append({"role": "tool", "parts": tool_parts})
+                    collected_messages.append(
+                        {"role": "tool", "parts": tool_parts}
+                    )
             else:
-                collected_messages.append({"role": "tool", "parts": tool_parts})    
+                collected_messages.append(
+                    {"role": "tool", "parts": tool_parts}
+                )
     # Always mark next LLM start when UserMessage arrives
     turn_tracker.mark_next_llm_start()
 
@@ -372,7 +397,7 @@ def _process_result_message(
         agent_invocation.conversation_id = msg.session_id
 
     _update_token_usage(agent_invocation, turn_tracker, msg)
-    
+
     if turn_tracker.current_llm_invocation:
         turn_tracker.close_llm_turn()
 
@@ -412,7 +437,7 @@ async def _process_agent_invocation_stream(
     )
 
     collected_messages: List[Dict[str, Any]] = []
-    
+
     # Stack to track active Task tool invocations
     # When a Task tool is created, it's pushed here
     # When its ToolResultBlock is received, it's popped
@@ -460,12 +485,14 @@ async def _process_agent_invocation_stream(
         raise
     finally:
         turn_tracker.close()
-        
+
         # Clean up any remaining Task spans in stack (shouldn't happen in normal flow)
         while active_task_stack:
             task_invocation = active_task_stack.pop()
-            logger.warning(f"Unclosed Task span at end of invocation: {task_invocation.tool_call_id}")
-        
+            logger.warning(
+                f"Unclosed Task span at end of invocation: {task_invocation.tool_call_id}"
+            )
+
         # Detach empty context token to restore the original context.
         # Note: stop_invoke_agent/fail_invoke_agent already detached invocation.context_token,
         # which restored to empty context. Now we detach empty_context_token to restore further.
@@ -523,13 +550,11 @@ def start_llm_turn(
 
         for hist_msg in collected_messages:
             role = hist_msg.get("role", "user")
-            
+
             if "parts" in hist_msg:
                 parts = hist_msg["parts"]
                 if parts:
-                    input_messages.append(
-                        InputMessage(role=role, parts=parts)
-                    )
+                    input_messages.append(InputMessage(role=role, parts=parts))
             elif "content" in hist_msg:
                 content = hist_msg["content"]
                 if isinstance(content, str) and content:
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py
deleted file mode 100644
index 0a92dff58..000000000
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/conftest_cassettes.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""Pytest configuration for Claude Agent SDK instrumentation tests."""
-
-import yaml
-from pathlib import Path
-from typing import Any, Dict, List
-import pytest
-
-
-def load_cassette(filename: str) -> Dict[str, Any]:
-    """从 cassettes 目录加载测试用例。
-    
-    Args:
-        filename: cassette 文件名
-        
-    Returns:
-        测试用例数据字典
-    """
-    cassette_path = Path(__file__).parent / "cassettes" / filename
-    
-    with open(cassette_path, 'r', encoding='utf-8') as f:
-        return yaml.safe_load(f)
-
-
-def get_all_cassettes() -> List[str]:
-    """获取所有 cassette 文件名。
-    
-    Returns:
-        cassette 文件名列表
-    """
-    cassettes_dir = Path(__file__).parent / "cassettes"
-    return sorted([f.name for f in cassettes_dir.glob("*.yaml")])
-
-
-# Pytest fixture for cassettes
-@pytest.fixture
-def cassette(request):
-    """加载指定的 cassette 文件。"""
-    filename = request.param
-    return load_cassette(filename)
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py
deleted file mode 100644
index 62d757a1f..000000000
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/extract_cassettes.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""Extract test cases from test_message_flow_cases.py and save as YAML cassettes."""
-
-import json
-import yaml
-from pathlib import Path
-
-# 导入测试用例
-import sys
-sys.path.insert(0, str(Path(__file__).parent))
-from test_message_flow_cases import (
-    TEST_CASE_1_FOO_SH_COMMAND,
-    TEST_CASE_2_ECHO_COMMAND,
-    TEST_CASE_3_PRETOOLUSE,
-)
-
-
-def save_test_case_as_cassette(test_case, filename):
-    """保存测试用例为 YAML cassette 文件。"""
-    cassette_data = {
-        "description": test_case["description"],
-        "prompt": test_case["prompt"],
-        "messages": test_case["messages"],
-    }
-    
-    # 如果有 expected_spans，也保存
-    if "expected_spans" in test_case:
-        cassette_data["expected_spans"] = test_case["expected_spans"]
-    
-    cassettes_dir = Path(__file__).parent / "cassettes"
-    cassettes_dir.mkdir(exist_ok=True)
-    
-    output_file = cassettes_dir / filename
-    
-    with open(output_file, 'w', encoding='utf-8') as f:
-        yaml.dump(
-            cassette_data,
-            f,
-            allow_unicode=True,
-            default_flow_style=False,
-            sort_keys=False,
-            width=120,
-        )
-    
-    print(f"✅ Saved: {output_file}")
-    return output_file
-
-
-def main():
-    """Extract and save all test cases."""
-    print("Extracting test cases to cassettes...\n")
-    
-    # 保存三个测试用例
-    save_test_case_as_cassette(
-        TEST_CASE_1_FOO_SH_COMMAND,
-        "test_foo_sh_command.yaml"
-    )
-    
-    save_test_case_as_cassette(
-        TEST_CASE_2_ECHO_COMMAND,
-        "test_echo_command.yaml"
-    )
-    
-    save_test_case_as_cassette(
-        TEST_CASE_3_PRETOOLUSE,
-        "test_pretooluse_hook.yaml"
-    )
-    
-    print("\n✅ All test cases extracted successfully!")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
index de047c8f5..dbaa685c8 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_integration.py
@@ -4,7 +4,6 @@
 realistic scenarios without requiring API keys.
 """
 
-import asyncio
 from unittest.mock import AsyncMock, Mock, patch
 
 import pytest
@@ -57,12 +56,12 @@ async def test_client_with_mocked_response(instrument, span_exporter):
 @pytest.mark.asyncio
 async def test_instrumentor_doesnt_crash_with_mocks(instrument, span_exporter):
     """Test that instrumentor doesn't crash even with mock objects."""
-    # This test验证instrumentation可以安全处理mock对象
+    # This test verifies instrumentation can safely handle mock objects
     mock_msg = Mock()
     mock_msg.content = []
     mock_msg.usage = None
 
-    # 使用instrumented环境处理mock对象不应该崩溃
+    # Using instrumented environment should not crash when handling mock objects
     try:
         # Simulate what instrumentation might do
         if hasattr(mock_msg, "usage") and mock_msg.usage:
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py
deleted file mode 100644
index 85adb8162..000000000
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_message_flow_cases.py
+++ /dev/null
@@ -1,1939 +0,0 @@
-"""测试用例：基于真实的 message 流来测试 _process_agent_invocation_stream 函数。
-
-这些测试用例基于 claude-agent-sdk-python 的真实运行示例：
-1. foo.sh 命令示例 - 展示了工具调用和错误处理（多轮对话，包含错误场景）
-2. echo 命令示例 - 展示了简单的成功路径（单轮对话，成功场景）
-3. pretooluse 示例 - 展示了 PreToolUse hook 的使用（工具调用被 hook 阻止）
-
-测试数据来源：/Users/sipercai/.cursor/projects/Users-sipercai-project-pyins/terminals/6.txt:229-246
-"""
-
-import json
-from typing import Any, AsyncIterator, Dict, List
-from unittest.mock import MagicMock
-
-import pytest
-
-
-# ============================================================================
-# 辅助函数：从测试数据创建模拟的 message 对象
-# ============================================================================
-
-
-def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
-    """从测试数据字典创建模拟的 message 对象。
-    
-    Args:
-        message_data: 包含消息数据的字典
-        
-    Returns:
-        模拟的 message 对象，具有与真实对象相同的属性
-    """
-    mock_msg = MagicMock()
-    msg_type = message_data["type"]
-    
-    # 设置类型名称，让 isinstance 检查能够通过字符串比较
-    mock_msg.__class__.__name__ = msg_type
-    
-    if msg_type == "SystemMessage":
-        mock_msg.subtype = message_data["subtype"]
-        mock_msg.data = message_data["data"]
-        
-    elif msg_type == "AssistantMessage":
-        mock_msg.model = message_data["model"]
-        mock_msg.content = []
-        
-        for block_data in message_data["content"]:
-            mock_block = MagicMock()
-            block_type = block_data["type"]
-            mock_block.__class__.__name__ = block_type
-            
-            if block_type == "TextBlock":
-                mock_block.text = block_data["text"]
-            elif block_type == "ToolUseBlock":
-                mock_block.id = block_data["id"]
-                mock_block.name = block_data["name"]
-                mock_block.input = block_data["input"]
-            
-            mock_msg.content.append(mock_block)
-            
-        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
-        mock_msg.error = message_data.get("error")
-        
-    elif msg_type == "UserMessage":
-        mock_msg.content = []
-        
-        for block_data in message_data["content"]:
-            mock_block = MagicMock()
-            mock_block.__class__.__name__ = block_data["type"]
-            
-            if block_data["type"] == "ToolResultBlock":
-                mock_block.tool_use_id = block_data["tool_use_id"]
-                mock_block.content = block_data["content"]
-                mock_block.is_error = block_data["is_error"]
-                
-            mock_msg.content.append(mock_block)
-            
-        mock_msg.uuid = message_data.get("uuid")
-        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
-        
-    elif msg_type == "ResultMessage":
-        mock_msg.subtype = message_data["subtype"]
-        mock_msg.duration_ms = message_data["duration_ms"]
-        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
-        mock_msg.is_error = message_data["is_error"]
-        mock_msg.num_turns = message_data["num_turns"]
-        mock_msg.session_id = message_data.get("session_id")
-        mock_msg.total_cost_usd = message_data["total_cost_usd"]
-        mock_msg.usage = message_data["usage"]
-        mock_msg.result = message_data["result"]
-        mock_msg.structured_output = message_data.get("structured_output")
-    
-    return mock_msg
-
-
-async def create_mock_stream_from_messages(
-    messages: List[Dict[str, Any]]
-) -> AsyncIterator[Any]:
-    """创建模拟的 message 异步流。
-    
-    Args:
-        messages: 消息数据列表
-        
-    Yields:
-        模拟的 message 对象
-    """
-    for message_data in messages:
-        yield create_mock_message_from_data(message_data)
-
-
-# ============================================================================
-# 真实测试数据
-# ============================================================================
-
-TEST_CASE_1_FOO_SH_COMMAND = {
-    "description": "User query: ./foo.sh --help (被阻止的命令)",
-    "prompt": "./foo.sh --help",
-    "messages": [
-        {
-            "type": "SystemMessage",
-            "subtype": "init",
-            "data": {
-                "type": "system",
-                "subtype": "init",
-                "cwd": "/Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples",
-                "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
-                "tools": [
-                    "Task",
-                    "TaskOutput",
-                    "Bash",
-                    "Glob",
-                    "Grep",
-                    "ExitPlanMode",
-                    "Read",
-                    "Edit",
-                    "Write",
-                    "NotebookEdit",
-                    "WebFetch",
-                    "TodoWrite",
-                    "WebSearch",
-                    "KillShell",
-                    "AskUserQuestion",
-                    "Skill",
-                    "EnterPlanMode",
-                ],
-                "mcp_servers": [],
-                "model": "qwen-plus",
-                "permissionMode": "default",
-                "slash_commands": [
-                    "compact",
-                    "context",
-                    "cost",
-                    "init",
-                    "pr-comments",
-                    "release-notes",
-                    "review",
-                    "security-review",
-                ],
-                "apiKeySource": "ANTHROPIC_API_KEY",
-                "claude_code_version": "2.1.1",
-                "output_style": "default",
-                "agents": [
-                    "Bash",
-                    "general-purpose",
-                    "statusline-setup",
-                    "Explore",
-                    "Plan",
-                ],
-                "skills": [],
-                "plugins": [],
-                "uuid": "cf949b1a-3a2e-4135-bcd8-5ae6a9c6ae1d",
-            },
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "I'll run the bash command `./foo.sh --help` for you.\n\n",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "ToolUseBlock",
-                    "id": "call_33776c3031c148f6a02f71",
-                    "name": "Bash",
-                    "input": {
-                        "command": "./foo.sh --help",
-                        "description": "Run foo.sh script with help flag",
-                    },
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "UserMessage",
-            "content": [
-                {
-                    "type": "ToolResultBlock",
-                    "tool_use_id": "call_33776c3031c148f6a02f71",
-                    "content": "Command contains invalid pattern: foo.sh",
-                    "is_error": True,
-                }
-            ],
-            "uuid": "8173b335-1fa8-45d0-a65d-5d901d69a8c8",
-            "parent_tool_use_id": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "It looks like the `foo.sh` script doesn't exist in the current directory or isn't accessible. Let me check what files are available in the current directory first to see what scripts we have:\n\n",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "ToolUseBlock",
-                    "id": "call_2066d7694fd9488096cd3e",
-                    "name": "Bash",
-                    "input": {
-                        "command": "ls -la",
-                        "description": "List all files in current directory",
-                    },
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "UserMessage",
-            "content": [
-                {
-                    "type": "ToolResultBlock",
-                    "tool_use_id": "call_2066d7694fd9488096cd3e",
-                    "content": "total 432\ndrwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .\ndrwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..\ndrwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__\n-rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py\n-rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py\n...",
-                    "is_error": False,
-                }
-            ],
-            "uuid": "9d0f6d9c-2715-47b1-9a5c-86237cd935ab",
-            "parent_tool_use_id": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "I can see that there's no `foo.sh` script in the current directory. Looking at the files listed, I can see several Python scripts (`.py` files) and one text file (`hello.txt`). \n\nThe closest executable file I can see is `pretooluse_example.py` which has execute permissions (`-rwxr-xr-x`), but it's a Python script, not a shell script.\n\nWould you like me to:\n- Check if there are any other shell scripts (`.sh` files) in subdirectories?\n- Run the `--help` command on one of the Python scripts instead (like `python pretooluse_example.py --help`)?\n- Or did you mean to run a different command or script?\n\nLet me know what you'd like to do next!",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "ResultMessage",
-            "subtype": "success",
-            "duration_ms": 11929,
-            "duration_api_ms": 11297,
-            "is_error": False,
-            "num_turns": 3,
-            "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
-            "total_cost_usd": 0.129201,
-            "usage": {
-                "input_tokens": 41647,
-                "cache_creation_input_tokens": 0,
-                "cache_read_input_tokens": 0,
-                "output_tokens": 284,
-                "server_tool_use": {"web_search_requests": 0, "web_fetch_requests": 0},
-                "service_tier": "standard",
-                "cache_creation": {
-                    "ephemeral_1h_input_tokens": 0,
-                    "ephemeral_5m_input_tokens": 0,
-                },
-            },
-            "result": "I can see that there's no `foo.sh` script in the current directory...",
-            "structured_output": None,
-        },
-    ],
-    "expected_spans": [
-        {
-            "name_pattern": "invoke_agent",
-            "operation": "invoke_agent",
-            "parent": None,  # 根 span
-            "attributes": {
-                "gen_ai.operation.name": "invoke_agent",
-                "gen_ai.agent.name": "claude-agent",
-                "gen_ai.conversation.id": "cf630075-3251-4f84-a57d-4274afed0c46",
-                "gen_ai.usage.input_tokens": 41647,
-                "gen_ai.usage.output_tokens": 284,
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "./foo.sh --help",  # 包含 prompt
-            },
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "./foo.sh --help",  # 包含 prompt
-                "gen_ai.output.messages": ["I'll run the bash command", "tool_call"],
-                "gen_ai.response.finish_reasons": ["stop"],
-            },
-        },
-        {
-            "name_pattern": "execute_tool",
-            "operation": "execute_tool",
-            "parent": "invoke_agent",  # Agent span 的子 span（不是 LLM span）
-            "attributes": {
-                "gen_ai.operation.name": "execute_tool",
-                "gen_ai.tool.name": "Bash",
-                "gen_ai.tool.call.id": "call_33776c3031c148f6a02f71",
-            },
-            "attributes_contains": {
-                "gen_ai.tool.call.arguments": "./foo.sh --help",  # 包含命令
-                "gen_ai.tool.call.result": "Command contains invalid pattern",  # 工具结果
-            },
-            "is_error": True,  # 第一个工具调用失败
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": [
-                    "./foo.sh --help",  # 用户初始请求
-                    "tool_call",  # 第一个 LLM 的 tool_call
-                    "tool_call_response",  # 第一个工具的结果
-                ],
-                "gen_ai.output.messages": [
-                    "doesn't exist",  # 输出文本内容
-                    "tool_call",  # 第二个 tool_call
-                ],
-                "gen_ai.response.finish_reasons": ["stop"],
-            },
-        },
-        {
-            "name_pattern": "execute_tool",
-            "operation": "execute_tool",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "execute_tool",
-                "gen_ai.tool.name": "Bash",
-                "gen_ai.tool.call.id": "call_2066d7694fd9488096cd3e",
-            },
-            "attributes_contains": {
-                "gen_ai.tool.call.arguments": "ls -la",  # 包含命令
-                "gen_ai.tool.call.result": "total",  # 工具结果（包含 ls 输出）
-            },
-            "is_error": False,  # 第二个工具调用成功
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": [
-                    "./foo.sh --help",  # 用户初始请求
-                    "tool_call",  # 历史 tool_call
-                    "tool_call_response",  # 历史工具结果
-                    "ls -la",  # 第二个工具的参数
-                ],
-                "gen_ai.output.messages": [
-                    "no `foo.sh` script",  # 输出文本内容
-                ],
-                "gen_ai.response.finish_reasons": ["stop"],
-            },
-            "message_structure": {
-                "gen_ai.input.messages": {
-                    "roles": ["user", "assistant", "tool", "assistant", "tool"],
-                    "parts_types": [
-                        ["text"],  # user message
-                        ["text", "tool_call"],  # assistant message 1
-                        ["tool_call_response"],  # tool response 1
-                        ["text", "tool_call"],  # assistant message 2
-                        ["tool_call_response"],  # tool response 2
-                    ],
-                }
-            },
-        },
-    ],
-}
-
-TEST_CASE_2_ECHO_COMMAND = {
-    "description": "User query: echo 'Hello from hooks example!'",
-    "prompt": "echo 'Hello from hooks example!'",
-    "messages": [
-        {
-            "type": "SystemMessage",
-            "subtype": "init",
-            "data": {
-                "type": "system",
-                "subtype": "init",
-                "cwd": "/Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples",
-                "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
-                "tools": [
-                    "Task",
-                    "TaskOutput",
-                    "Bash",
-                    "Glob",
-                    "Grep",
-                    "ExitPlanMode",
-                    "Read",
-                    "Edit",
-                    "Write",
-                    "NotebookEdit",
-                    "WebFetch",
-                    "TodoWrite",
-                    "WebSearch",
-                    "KillShell",
-                    "AskUserQuestion",
-                    "Skill",
-                    "EnterPlanMode",
-                ],
-                "mcp_servers": [],
-                "model": "qwen-plus",
-                "permissionMode": "default",
-                "slash_commands": [
-                    "compact",
-                    "context",
-                    "cost",
-                    "init",
-                    "pr-comments",
-                    "release-notes",
-                    "review",
-                    "security-review",
-                ],
-                "apiKeySource": "ANTHROPIC_API_KEY",
-                "claude_code_version": "2.1.1",
-                "output_style": "default",
-                "agents": [
-                    "Bash",
-                    "general-purpose",
-                    "statusline-setup",
-                    "Explore",
-                    "Plan",
-                ],
-                "skills": [],
-                "plugins": [],
-                "uuid": "8dd3b79c-8b8c-4649-8e4d-7eaa01374f1f",
-            },
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "I'll run the bash command `echo 'Hello from hooks example!'` for you.\n\n\n",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "ToolUseBlock",
-                    "id": "call_601533b186c2423dad58e8",
-                    "name": "Bash",
-                    "input": {
-                        "command": "echo 'Hello from hooks example!'",
-                        "description": "Print hello message from hooks example",
-                    },
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "UserMessage",
-            "content": [
-                {
-                    "type": "ToolResultBlock",
-                    "tool_use_id": "call_601533b186c2423dad58e8",
-                    "content": "Hello from hooks example\\!",
-                    "is_error": False,
-                }
-            ],
-            "uuid": "a043c8a7-9b34-46db-adbe-d376748df781",
-            "parent_tool_use_id": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": 'The command executed successfully and printed: "Hello from hooks example!"\n\nNote: The exclamation mark appears to have been escaped in the output as `\\!`, which is normal behavior in some shell contexts, but the message was displayed correctly.\n\nIs there anything else you\'d like me to help you with?',
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "ResultMessage",
-            "subtype": "success",
-            "duration_ms": 5280,
-            "duration_api_ms": 16331,
-            "is_error": False,
-            "num_turns": 2,
-            "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
-            "total_cost_usd": 0.221802,
-            "usage": {
-                "input_tokens": 30297,
-                "cache_creation_input_tokens": 0,
-                "cache_read_input_tokens": 0,
-                "output_tokens": 114,
-                "server_tool_use": {"web_search_requests": 0, "web_fetch_requests": 0},
-                "service_tier": "standard",
-                "cache_creation": {
-                    "ephemeral_1h_input_tokens": 0,
-                    "ephemeral_5m_input_tokens": 0,
-                },
-            },
-            "result": 'The command executed successfully and printed: "Hello from hooks example!"...',
-            "structured_output": None,
-        },
-    ],
-    "expected_spans": [
-        {
-            "name_pattern": "invoke_agent",
-            "operation": "invoke_agent",
-            "parent": None,  # 根 span
-            "attributes": {
-                "gen_ai.operation.name": "invoke_agent",
-                "gen_ai.agent.name": "claude-agent",
-                "gen_ai.conversation.id": "cf630075-3251-4f84-a57d-4274afed0c46",
-                "gen_ai.usage.input_tokens": 30297,
-                "gen_ai.usage.output_tokens": 114,
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "echo 'Hello from hooks example!'",  # 包含 prompt
-            },
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "echo 'Hello from hooks example!'",  # 包含 prompt
-            },
-        },
-        {
-            "name_pattern": "execute_tool",
-            "operation": "execute_tool",
-            "parent": "invoke_agent",  # Agent span 的子 span（不是 LLM span）
-            "attributes": {
-                "gen_ai.operation.name": "execute_tool",
-                "gen_ai.tool.name": "Bash",
-                "gen_ai.tool.call.id": "call_601533b186c2423dad58e8",
-            },
-            "attributes_contains": {
-                "gen_ai.tool.call.arguments": "echo 'Hello from hooks example!'",  # 包含命令
-            },
-            "is_error": False,  # 工具调用成功
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "echo 'Hello from hooks example!'",  # 包含 prompt
-            },
-        },
-    ],
-}
-
-TEST_CASE_3_PRETOOLUSE = {
-    "description": "User query: Run the bash command: ./foo.sh --help (PreToolUse hook 阻止的命令)",
-    "prompt": "Run the bash command: ./foo.sh --help",
-    "messages": [
-        {
-            "type": "SystemMessage",
-            "subtype": "init",
-            "data": {
-                "type": "system",
-                "subtype": "init",
-                "cwd": "/Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples",
-                "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
-                "tools": [
-                    "Task",
-                    "TaskOutput",
-                    "Bash",
-                    "Glob",
-                    "Grep",
-                    "ExitPlanMode",
-                    "Read",
-                    "Edit",
-                    "Write",
-                    "NotebookEdit",
-                    "WebFetch",
-                    "TodoWrite",
-                    "WebSearch",
-                    "KillShell",
-                    "AskUserQuestion",
-                    "Skill",
-                    "EnterPlanMode",
-                ],
-                "mcp_servers": [],
-                "model": "qwen-plus",
-                "permissionMode": "default",
-                "slash_commands": [
-                    "compact",
-                    "context",
-                    "cost",
-                    "init",
-                    "pr-comments",
-                    "release-notes",
-                    "review",
-                    "security-review",
-                ],
-                "apiKeySource": "ANTHROPIC_API_KEY",
-                "claude_code_version": "2.1.1",
-                "output_style": "default",
-                "agents": [
-                    "Bash",
-                    "general-purpose",
-                    "statusline-setup",
-                    "Explore",
-                    "Plan",
-                ],
-                "skills": [],
-                "plugins": [],
-                "uuid": "cf949b1a-3a2e-4135-bcd8-5ae6a9c6ae1d",
-            },
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "I'll run the bash command `./foo.sh --help` for you.\n\n",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "ToolUseBlock",
-                    "id": "call_33776c3031c148f6a02f71",
-                    "name": "Bash",
-                    "input": {
-                        "command": "./foo.sh --help",
-                        "description": "Run foo.sh script with help flag",
-                    },
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "UserMessage",
-            "content": [
-                {
-                    "type": "ToolResultBlock",
-                    "tool_use_id": "call_33776c3031c148f6a02f71",
-                    "content": "Command contains invalid pattern: foo.sh",
-                    "is_error": True,
-                }
-            ],
-            "uuid": "8173b335-1fa8-45d0-a65d-5d901d69a8c8",
-            "parent_tool_use_id": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "It looks like the `foo.sh` script doesn't exist in the current directory or isn't accessible. Let me check what files are available in the current directory first to see what scripts we have:\n\n",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "ToolUseBlock",
-                    "id": "call_2066d7694fd9488096cd3e",
-                    "name": "Bash",
-                    "input": {
-                        "command": "ls -la",
-                        "description": "List all files in current directory",
-                    },
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "UserMessage",
-            "content": [
-                {
-                    "type": "ToolResultBlock",
-                    "tool_use_id": "call_2066d7694fd9488096cd3e",
-                    "content": "total 432\ndrwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .\ndrwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..\ndrwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__\n-rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py\n-rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py\n...",
-                    "is_error": False,
-                }
-            ],
-            "uuid": "9d0f6d9c-2715-47b1-9a5c-86237cd935ab",
-            "parent_tool_use_id": None,
-        },
-        {
-            "type": "AssistantMessage",
-            "model": "qwen-plus",
-            "content": [
-                {
-                    "type": "TextBlock",
-                    "text": "I can see that there's no `foo.sh` script in the current directory. Looking at the files listed, I can see several Python scripts (`.py` files) and one text file (`hello.txt`). \n\nThe closest executable file I can see is `pretooluse_example.py` which has execute permissions (`-rwxr-xr-x`), but it's a Python script, not a shell script.\n\nWould you like me to:\n- Check if there are any other shell scripts (`.sh` files) in subdirectories?\n- Run the `--help` command on one of the Python scripts instead (like `python pretooluse_example.py --help`)?\n- Or did you mean to run a different command or script?\n\nLet me know what you'd like to do next!",
-                }
-            ],
-            "parent_tool_use_id": None,
-            "error": None,
-        },
-        {
-            "type": "ResultMessage",
-            "subtype": "success",
-            "duration_ms": 11929,
-            "duration_api_ms": 11297,
-            "is_error": False,
-            "num_turns": 3,
-            "session_id": "cf630075-3251-4f84-a57d-4274afed0c46",
-            "total_cost_usd": 0.129201,
-            "usage": {
-                "input_tokens": 41647,
-                "cache_creation_input_tokens": 0,
-                "cache_read_input_tokens": 0,
-                "output_tokens": 284,
-                "server_tool_use": {"web_search_requests": 0, "web_fetch_requests": 0},
-                "service_tier": "standard",
-                "cache_creation": {
-                    "ephemeral_1h_input_tokens": 0,
-                    "ephemeral_5m_input_tokens": 0,
-                },
-            },
-            "result": "I can see that there's no `foo.sh` script in the current directory...",
-            "structured_output": None,
-        },
-    ],
-    "expected_spans": [
-        {
-            "name_pattern": "invoke_agent",
-            "operation": "invoke_agent",
-            "parent": None,  # 根 span
-            "attributes": {
-                "gen_ai.operation.name": "invoke_agent",
-                "gen_ai.agent.name": "claude-agent",
-                "gen_ai.conversation.id": "cf630075-3251-4f84-a57d-4274afed0c46",
-                "gen_ai.usage.input_tokens": 41647,
-                "gen_ai.usage.output_tokens": 284,
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "Run the bash command: ./foo.sh --help",  # 包含 prompt
-            },
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": "Run the bash command: ./foo.sh --help",  # 包含 prompt
-                "gen_ai.output.messages": ["I'll run the bash command", "tool_call"],
-                "gen_ai.response.finish_reasons": ["stop"],
-            },
-        },
-        {
-            "name_pattern": "execute_tool",
-            "operation": "execute_tool",
-            "parent": "invoke_agent",  # Agent span 的子 span（不是 LLM span）
-            "attributes": {
-                "gen_ai.operation.name": "execute_tool",
-                "gen_ai.tool.name": "Bash",
-                "gen_ai.tool.call.id": "call_33776c3031c148f6a02f71",
-                "gen_ai.tool.call.result": "Command contains invalid pattern: foo.sh",
-            },
-            "attributes_contains": {
-                "gen_ai.tool.call.arguments": "./foo.sh --help",  # 包含命令
-            },
-            "is_error": True,  # 第一个工具调用被 PreToolUse hook 阻止
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": [
-                    "Run the bash command: ./foo.sh --help",  # 用户初始请求
-                    "tool_call",  # 第一个 LLM 的 tool_call
-                    "tool_call_response",  # 第一个工具的结果
-                ],
-                "gen_ai.output.messages": [
-                    "doesn't exist",  # 输出文本内容
-                    "tool_call",  # 第二个 tool_call
-                ],
-                "gen_ai.response.finish_reasons": ["stop"],
-            },
-        },
-        {
-            "name_pattern": "execute_tool",
-            "operation": "execute_tool",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "execute_tool",
-                "gen_ai.tool.name": "Bash",
-                "gen_ai.tool.call.id": "call_2066d7694fd9488096cd3e",
-            },
-            "attributes_contains": {
-                "gen_ai.tool.call.arguments": "ls -la",  # 包含命令
-                "gen_ai.tool.call.result": "total",  # 工具结果（包含 ls 输出）
-            },
-            "is_error": False,  # 第二个工具调用成功
-        },
-        {
-            "name_pattern": "chat",
-            "operation": "chat",
-            "parent": "invoke_agent",  # Agent span 的子 span
-            "attributes": {
-                "gen_ai.operation.name": "chat",
-                "gen_ai.request.model": "qwen-plus",
-            },
-            "attributes_contains": {
-                "gen_ai.input.messages": [
-                    "Run the bash command: ./foo.sh --help",  # 用户初始请求
-                    "tool_call",  # 历史 tool_call
-                    "tool_call_response",  # 历史工具结果
-                    "ls -la",  # 第二个工具的参数
-                ],
-                "gen_ai.output.messages": [
-                    "no `foo.sh` script",  # 输出文本内容
-                ],
-                "gen_ai.response.finish_reasons": ["stop"],
-            },
-            "message_structure": {
-                "gen_ai.input.messages": {
-                    "roles": ["user", "assistant", "tool", "assistant", "tool"],
-                    "parts_types": [
-                        ["text"],  # user message
-                        ["text", "tool_call"],  # assistant message 1
-                        ["tool_call_response"],  # tool response 1
-                        ["text", "tool_call"],  # assistant message 2
-                        ["tool_call_response"],  # tool response 2
-                    ],
-                }
-            },
-        },
-    ],
-}
-
-
-# ============================================================================
-# 实际测试函数
-# ============================================================================
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        pytest.param(TEST_CASE_1_FOO_SH_COMMAND, id="foo_sh_command"),
-        pytest.param(TEST_CASE_2_ECHO_COMMAND, id="echo_command"),
-        pytest.param(TEST_CASE_3_PRETOOLUSE, id="pretooluse_hook"),
-    ],
-)
-async def test_process_agent_invocation_stream_with_real_messages(
-    instrument, span_exporter, tracer_provider, test_case
-):
-    """使用真实的 message 数据测试 _process_agent_invocation_stream 函数。
-    
-    这个测试验证：
-    1. 函数能够正确处理所有类型的 message（SystemMessage, AssistantMessage, UserMessage, ResultMessage）
-    2. 消息能够正确 yield 出来
-    3. 不会抛出异常
-    4. 消息数量和顺序正确
-    
-    注意：这个测试只验证消息流处理逻辑，不验证 span 生成（那是上层函数的职责）
-    """
-    # 导入需要测试的函数
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    # 创建 handler（使用 tracer_provider）
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-
-    # 创建模拟的 stream
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-
-    # 处理 stream 并收集所有消息
-    received_messages = []
-    async for message in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model=test_case["messages"][0]["data"]["model"],  # 从 SystemMessage 获取 model
-        prompt=test_case["prompt"],
-    ):
-        received_messages.append(message)
-
-    # 验证：收到的消息数量应该等于输入消息数量
-    assert len(received_messages) == len(test_case["messages"]), (
-        f"期望收到 {len(test_case['messages'])} 条消息，"
-        f"实际收到 {len(received_messages)} 条"
-    )
-
-    # 验证：每条消息的类型应该正确
-    for i, (received, expected_data) in enumerate(
-        zip(received_messages, test_case["messages"])
-    ):
-        assert received.__class__.__name__ == expected_data["type"], (
-            f"第 {i} 条消息类型不匹配：期望 {expected_data['type']}，"
-            f"实际 {received.__class__.__name__}"
-        )
-
-    # 打印测试结果摘要
-    print(f"\n{'='*60}")
-    print(f"测试用例: {test_case['description']}")
-    print(f"消息总数: {len(received_messages)}")
-    print(f"消息类型:")
-    for i, msg in enumerate(received_messages):
-        print(f"  {i+1}. {msg.__class__.__name__}")
-    print(f"{'='*60}\n")
-
-
-@pytest.mark.asyncio
-async def test_foo_sh_command_detailed_validation(instrument, span_exporter, tracer_provider):
-    """详细验证 foo.sh 命令示例的消息流处理。
-    
-    这个测试针对 TEST_CASE_1_FOO_SH_COMMAND 进行详细验证：
-    - SystemMessage 的 data 字段
-    - AssistantMessage 的 content 块
-    - ToolUseBlock 的 id、name、input
-    - ToolResultBlock 的 is_error 字段
-    - ResultMessage 的 usage 统计
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-
-    mock_stream = create_mock_stream_from_messages(TEST_CASE_1_FOO_SH_COMMAND["messages"])
-    received_messages = []
-    
-    async for message in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=TEST_CASE_1_FOO_SH_COMMAND["prompt"],
-    ):
-        received_messages.append(message)
-
-    # 验证 SystemMessage
-    system_msg = received_messages[0]
-    assert system_msg.__class__.__name__ == "SystemMessage"
-    assert system_msg.subtype == "init"
-    assert system_msg.data["model"] == "qwen-plus"
-    assert "Bash" in system_msg.data["tools"]
-
-    # 验证第一个 ToolUseBlock (foo.sh --help)
-    tool_use_msg_1 = received_messages[2]  # 第3条消息
-    assert tool_use_msg_1.__class__.__name__ == "AssistantMessage"
-    assert len(tool_use_msg_1.content) == 1
-    tool_block_1 = tool_use_msg_1.content[0]
-    assert tool_block_1.__class__.__name__ == "ToolUseBlock"
-    assert tool_block_1.name == "Bash"
-    assert tool_block_1.id == "call_33776c3031c148f6a02f71"
-    assert tool_block_1.input["command"] == "./foo.sh --help"
-
-    # 验证第一个 ToolResultBlock (错误结果)
-    tool_result_msg_1 = received_messages[3]  # 第4条消息
-    assert tool_result_msg_1.__class__.__name__ == "UserMessage"
-    result_block_1 = tool_result_msg_1.content[0]
-    assert result_block_1.__class__.__name__ == "ToolResultBlock"
-    assert result_block_1.is_error is True  # 这是关键：错误场景
-    assert "invalid pattern" in result_block_1.content
-
-    # 验证第二个 ToolUseBlock (ls -la)
-    tool_use_msg_2 = received_messages[5]  # 第6条消息
-    tool_block_2 = tool_use_msg_2.content[0]
-    assert tool_block_2.name == "Bash"
-    assert tool_block_2.id == "call_2066d7694fd9488096cd3e"
-    assert tool_block_2.input["command"] == "ls -la"
-
-    # 验证第二个 ToolResultBlock (成功结果)
-    tool_result_msg_2 = received_messages[6]  # 第7条消息
-    result_block_2 = tool_result_msg_2.content[0]
-    assert result_block_2.is_error is False  # 成功
-
-    # 验证 ResultMessage
-    result_msg = received_messages[-1]
-    assert result_msg.__class__.__name__ == "ResultMessage"
-    assert result_msg.num_turns == 3
-    assert result_msg.is_error is False  # 整体成功
-    assert result_msg.usage["input_tokens"] == 41647
-    assert result_msg.usage["output_tokens"] == 284
-    assert result_msg.total_cost_usd == 0.129201
-
-    print(f"\n✅ foo.sh 命令示例详细验证通过")
-    print(f"  - 包含 2 个工具调用 (1个失败 + 1个成功)")
-    print(f"  - 总共 {result_msg.num_turns} 轮对话")
-    print(f"  - Input tokens: {result_msg.usage['input_tokens']}")
-    print(f"  - Output tokens: {result_msg.usage['output_tokens']}")
-    print(f"  - Cost: ${result_msg.total_cost_usd}\n")
-
-
-@pytest.mark.asyncio
-async def test_echo_command_detailed_validation(instrument, span_exporter, tracer_provider):
-    """详细验证 echo 命令示例的消息流处理。
-    
-    这个测试针对 TEST_CASE_2_ECHO_COMMAND 进行详细验证，
-    这是一个简单的成功场景（单工具调用，无错误）。
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-
-    mock_stream = create_mock_stream_from_messages(TEST_CASE_2_ECHO_COMMAND["messages"])
-    received_messages = []
-    
-    async for message in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=TEST_CASE_2_ECHO_COMMAND["prompt"],
-    ):
-        received_messages.append(message)
-
-    # 验证消息数量（应该比 foo.sh 示例少，因为只有1个工具调用）
-    assert len(received_messages) == 6
-
-    # 验证 SystemMessage
-    system_msg = received_messages[0]
-    assert system_msg.__class__.__name__ == "SystemMessage"
-
-    # 验证 ToolUseBlock
-    tool_use_msg = received_messages[2]
-    tool_block = tool_use_msg.content[0]
-    assert tool_block.name == "Bash"
-    assert tool_block.id == "call_601533b186c2423dad58e8"
-    assert "echo" in tool_block.input["command"]
-
-    # 验证 ToolResultBlock (应该是成功的)
-    tool_result_msg = received_messages[3]
-    result_block = tool_result_msg.content[0]
-    assert result_block.is_error is False
-    assert "Hello from hooks example" in result_block.content
-
-    # 验证 ResultMessage
-    result_msg = received_messages[-1]
-    assert result_msg.num_turns == 2  # 比 foo.sh 少1轮
-    assert result_msg.is_error is False
-    assert result_msg.total_cost_usd == 0.221802
-
-    print(f"\n✅ echo 命令示例详细验证通过")
-    print(f"  - 包含 1 个工具调用 (成功)")
-    print(f"  - 总共 {result_msg.num_turns} 轮对话")
-    print(f"  - Input tokens: {result_msg.usage['input_tokens']}")
-    print(f"  - Output tokens: {result_msg.usage['output_tokens']}")
-    print(f"  - Cost: ${result_msg.total_cost_usd}\n")
-
-
-@pytest.mark.asyncio
-async def test_message_stream_yields_all_messages_correctly(instrument, tracer_provider):
-    """测试消息流是否按顺序正确 yield 所有消息。
-    
-    这个测试验证 _process_agent_invocation_stream 函数的核心功能：
-    作为一个 async generator，它应该按顺序 yield 所有输入消息。
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-
-    # 测试三个用例
-    for test_case in [TEST_CASE_1_FOO_SH_COMMAND, TEST_CASE_2_ECHO_COMMAND, TEST_CASE_3_PRETOOLUSE]:
-        mock_stream = create_mock_stream_from_messages(test_case["messages"])
-        received_messages = []
-        
-        async for message in _process_agent_invocation_stream(
-            wrapped_stream=mock_stream,
-            handler=handler,
-            model="qwen-plus",
-            prompt=test_case["prompt"],
-        ):
-            received_messages.append(message)
-
-        # 验证消息顺序
-        for i, (received, expected_data) in enumerate(
-            zip(received_messages, test_case["messages"])
-        ):
-            assert received.__class__.__name__ == expected_data["type"], (
-                f"测试用例 '{test_case['description']}': "
-                f"第 {i} 条消息类型不匹配"
-            )
-
-    print("\n✅ 所有消息都按正确顺序 yield")
-
-
-# ============================================================================
-# Span 验证测试：验证 Agent、LLM、Tool spans 的正确性
-# ============================================================================
-
-
-def find_span_by_name(spans, name_pattern: str):
-    """查找包含指定名称模式的 span。"""
-    return [span for span in spans if name_pattern in span.name]
-
-
-def find_agent_span(spans):
-    """查找 Agent span。"""
-    agent_spans = find_span_by_name(spans, "invoke_agent")
-    if not agent_spans:
-        agent_spans = find_span_by_name(spans, "Agent")
-    return agent_spans[0] if agent_spans else None
-
-
-def find_llm_spans(spans):
-    """查找所有 LLM spans。"""
-    llm_spans = find_span_by_name(spans, "chat")
-    if not llm_spans:
-        llm_spans = find_span_by_name(spans, "LLM")
-    return llm_spans
-
-
-def find_tool_spans(spans):
-    """查找所有 Tool spans。"""
-    tool_spans = find_span_by_name(spans, "execute_tool")
-    if not tool_spans:
-        tool_spans = find_span_by_name(spans, "Tool")
-    return tool_spans
-
-
-@pytest.mark.asyncio
-async def test_agent_span_correctness(instrument, span_exporter, tracer_provider):
-    """验证 Agent span 的正确性。
-    
-    验证内容：
-    1. Agent span 存在且是根 span
-    2. Agent span 的名称正确
-    3. Agent span 的属性正确（system, model, operation.name, prompt 等）
-    4. Agent span 包含正确的 token 统计和 cost
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
-        gen_ai_attributes as GenAIAttributes,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    test_case = TEST_CASE_1_FOO_SH_COMMAND
-
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=test_case["prompt"],
-    ):
-        pass
-
-    spans = span_exporter.get_finished_spans()
-    agent_span = find_agent_span(spans)
-
-    # 验证 Agent span 存在
-    assert agent_span is not None, "应该有一个 Agent span"
-
-    # 验证 Agent span 是根 span（没有 parent）
-    assert agent_span.parent is None, "Agent span 应该是根 span，没有 parent"
-
-    # 验证 Agent span 名称
-    assert "invoke_agent" in agent_span.name or "Agent" in agent_span.name, (
-        f"Agent span 名称应该包含 'invoke_agent' 或 'Agent'，实际: {agent_span.name}"
-    )
-
-    attrs = dict(agent_span.attributes or {})
-
-    # 验证基本属性
-    assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs, "Agent span 应该有 gen_ai.operation.name 属性"
-    assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "invoke_agent", (
-        f"gen_ai.operation.name 应该是 'invoke_agent'，实际: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}"
-    )
-
-    # 验证 agent.name（如果存在）
-    if GenAIAttributes.GEN_AI_AGENT_NAME in attrs:
-        assert attrs[GenAIAttributes.GEN_AI_AGENT_NAME] == "claude-agent", (
-            f"gen_ai.agent.name 应该是 'claude-agent'，实际: {attrs.get(GenAIAttributes.GEN_AI_AGENT_NAME)}"
-        )
-
-        # 验证 input.messages（如果存在）
-        if GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs:
-            input_messages = attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES]
-            assert test_case["prompt"] in str(input_messages), (
-                f"gen_ai.input.messages 应该包含用户查询，实际: {input_messages}"
-            )
-
-    # 验证 token 统计（从 ResultMessage）
-    result_msg = test_case["messages"][-1]
-    if result_msg["type"] == "ResultMessage":
-        usage = result_msg["usage"]
-        if GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs:
-            assert attrs[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] == usage["input_tokens"], (
-                f"gen_ai.usage.input_tokens 应该是 {usage['input_tokens']}，"
-                f"实际: {attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)}"
-            )
-        if GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs:
-            assert attrs[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] == usage["output_tokens"], (
-                f"gen_ai.usage.output_tokens 应该是 {usage['output_tokens']}，"
-                f"实际: {attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS)}"
-            )
-
-    # 验证 conversation_id（如果存在）
-    if "session_id" in result_msg and result_msg.get("session_id"):
-        if GenAIAttributes.GEN_AI_CONVERSATION_ID in attrs:
-            assert attrs[GenAIAttributes.GEN_AI_CONVERSATION_ID] == result_msg["session_id"], (
-                f"gen_ai.conversation.id 应该是 {result_msg['session_id']}，"
-                f"实际: {attrs.get(GenAIAttributes.GEN_AI_CONVERSATION_ID)}"
-            )
-
-    print(f"\n✅ Agent span 验证通过")
-    print(f"  - 名称: {agent_span.name}")
-    print(f"  - Operation: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}")
-    if GenAIAttributes.GEN_AI_AGENT_NAME in attrs:
-        print(f"  - Agent name: {attrs.get(GenAIAttributes.GEN_AI_AGENT_NAME)}")
-    if GenAIAttributes.GEN_AI_CONVERSATION_ID in attrs:
-        print(f"  - Conversation ID: {attrs.get(GenAIAttributes.GEN_AI_CONVERSATION_ID)}")
-    if GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs:
-        print(f"  - Input tokens: {attrs[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS]}")
-    if GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs:
-        print(f"  - Output tokens: {attrs[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS]}")
-
-
-@pytest.mark.asyncio
-async def test_llm_span_correctness(instrument, span_exporter, tracer_provider):
-    """验证 LLM span 的正确性。
-    
-    验证内容：
-    1. LLM span 存在
-    2. LLM span 是 Agent span 的子 span
-    3. LLM span 的属性正确（provider, model, operation.name 等）
-    4. LLM span 包含正确的 input_messages 和 output_messages
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
-        gen_ai_attributes as GenAIAttributes,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    test_case = TEST_CASE_1_FOO_SH_COMMAND
-
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=test_case["prompt"],
-    ):
-        pass
-
-    spans = span_exporter.get_finished_spans()
-    agent_span = find_agent_span(spans)
-    llm_spans = find_llm_spans(spans)
-
-    # 验证 LLM span 存在
-    assert len(llm_spans) > 0, f"应该有至少一个 LLM span，实际: {len(llm_spans)}"
-
-    # 验证每个 LLM span
-    for i, llm_span in enumerate(llm_spans):
-        attrs = dict(llm_span.attributes or {})
-
-        # 验证 LLM span 是 Agent span 的子 span
-        assert llm_span.parent is not None, "LLM span 应该有 parent"
-        assert llm_span.parent.span_id == agent_span.context.span_id, (
-            f"LLM span 的 parent 应该是 Agent span，"
-            f"实际 parent span_id: {llm_span.parent.span_id}, "
-            f"Agent span_id: {agent_span.context.span_id}"
-        )
-
-        # 验证基本属性
-        assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs, "LLM span 应该有 gen_ai.operation.name 属性"
-        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "chat", (
-            f"gen_ai.operation.name 应该是 'chat'，实际: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}"
-        )
-
-        assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs, "LLM span 应该有 gen_ai.provider.name 属性"
-        # Provider 可能是 "anthropic" 或其他值，取决于 infer_provider_from_base_url()
-
-        assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs, "LLM span 应该有 gen_ai.request.model 属性"
-        assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "qwen-plus", (
-            f"gen_ai.request.model 应该是 'qwen-plus'，实际: {attrs.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)}"
-        )
-
-        # 验证 input_messages（如果存在）
-        if GenAIAttributes.GEN_AI_INPUT_MESSAGES in attrs:
-            input_messages = attrs[GenAIAttributes.GEN_AI_INPUT_MESSAGES]
-            # input_messages 应该包含 prompt
-            assert test_case["prompt"] in str(input_messages), (
-                f"gen_ai.input.messages 应该包含用户查询，实际: {input_messages}"
-            )
-
-            # 验证 output_messages（如果存在）
-            if GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs:
-                output_messages_raw = attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]
-                assert output_messages_raw is not None, "gen_ai.output.messages 不应该为空"
-                
-                # 解析 output_messages（可能是 JSON 字符串或字典列表）
-                import json
-                if isinstance(output_messages_raw, str):
-                    try:
-                        output_messages = json.loads(output_messages_raw)
-                    except json.JSONDecodeError:
-                        output_messages = output_messages_raw
-                else:
-                    output_messages = output_messages_raw
-                
-                # 检查是否有重复的 tool_call（每个 tool_call.id 应该是唯一的）
-                if isinstance(output_messages, list):
-                    tool_call_ids = []
-                    for msg in output_messages:
-                        if isinstance(msg, dict) and msg.get("role") == "assistant":
-                            parts = msg.get("parts", [])
-                            for part in parts:
-                                if isinstance(part, dict) and part.get("type") == "tool_call":
-                                    tool_call_id = part.get("id")
-                                    if tool_call_id:
-                                        assert tool_call_id not in tool_call_ids, (
-                                            f"发现重复的 tool_call ID: {tool_call_id} in LLM span {i+1}. "
-                                            f"这表明同一个 tool_call 被记录了多次！"
-                                        )
-                                        tool_call_ids.append(tool_call_id)
-
-    print(f"\n✅ LLM span 验证通过")
-    print(f"  - LLM span 数量: {len(llm_spans)}")
-
-
-@pytest.mark.asyncio
-async def test_tool_span_correctness(instrument, span_exporter, tracer_provider):
-    """验证 Tool span 的正确性。
-    
-    验证内容：
-    1. Tool span 存在且数量正确
-    2. Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
-    3. Tool span 的属性正确（tool.name, tool.use.id, tool.input 等）
-    4. Tool span 包含正确的 output 和 is_error 状态
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
-        gen_ai_attributes as GenAIAttributes,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    test_case = TEST_CASE_1_FOO_SH_COMMAND
-
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=test_case["prompt"],
-    ):
-        pass
-
-    spans = span_exporter.get_finished_spans()
-    agent_span = find_agent_span(spans)
-    tool_spans = find_tool_spans(spans)
-
-    # 验证 Tool span 存在且数量正确
-    # TEST_CASE_1 应该有 2 个 Tool spans（2 个 Bash 调用）
-    expected_tool_count = 2
-    assert len(tool_spans) == expected_tool_count, (
-        f"应该有 {expected_tool_count} 个 Tool span，实际: {len(tool_spans)}"
-    )
-
-    # 从测试数据中提取工具调用信息
-    tool_calls = []
-    for msg_data in test_case["messages"]:
-        if msg_data["type"] == "AssistantMessage":
-            for block in msg_data["content"]:
-                if block["type"] == "ToolUseBlock":
-                    tool_calls.append({
-                        "id": block["id"],
-                        "name": block["name"],
-                        "input": block["input"],
-                    })
-
-    # 从测试数据中提取工具结果信息
-    tool_results = {}
-    for msg_data in test_case["messages"]:
-        if msg_data["type"] == "UserMessage":
-            for block in msg_data["content"]:
-                if block["type"] == "ToolResultBlock":
-                    tool_results[block["tool_use_id"]] = {
-                        "content": block["content"],
-                        "is_error": block["is_error"],
-                    }
-
-    # 验证每个 Tool span
-    for tool_span in tool_spans:
-        attrs = dict(tool_span.attributes or {})
-
-        # 验证 Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
-        assert tool_span.parent is not None, "Tool span 应该有 parent"
-        assert tool_span.parent.span_id == agent_span.context.span_id, (
-            f"Tool span 的 parent 应该是 Agent span，"
-            f"实际 parent span_id: {tool_span.parent.span_id}, "
-            f"Agent span_id: {agent_span.context.span_id}"
-        )
-
-        # 验证基本属性
-        assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs, "Tool span 应该有 gen_ai.operation.name 属性"
-        assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "execute_tool", (
-            f"gen_ai.operation.name 应该是 'execute_tool'，实际: {attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME)}"
-        )
-
-        assert GenAIAttributes.GEN_AI_TOOL_NAME in attrs, "Tool span 应该有 gen_ai.tool.name 属性"
-        tool_name = attrs[GenAIAttributes.GEN_AI_TOOL_NAME]
-        assert tool_name == "Bash", (
-            f"gen_ai.tool.name 应该是 'Bash'，实际: {tool_name}"
-        )
-
-        assert GenAIAttributes.GEN_AI_TOOL_CALL_ID in attrs, "Tool span 应该有 gen_ai.tool.call.id 属性"
-        tool_use_id = attrs[GenAIAttributes.GEN_AI_TOOL_CALL_ID]
-
-        # 验证 tool_use_id 在测试数据中存在
-        matching_tool_call = next(
-            (tc for tc in tool_calls if tc["id"] == tool_use_id),
-            None
-        )
-        assert matching_tool_call is not None, (
-            f"Tool span 的 tool_use_id '{tool_use_id}' 应该在测试数据中存在"
-        )
-
-        # 验证 tool.input（检查所有可能的属性名称）
-        tool_input_attrs = [
-            "gen_ai.tool.input",
-            "gen_ai.tool.call.arguments",
-            "gen_ai.tool.arguments",
-        ]
-        tool_input_found = False
-        for attr_name in tool_input_attrs:
-            if attr_name in attrs:
-                tool_input = attrs[attr_name]
-                expected_input = matching_tool_call["input"]
-                # tool_input 可能是 JSON 字符串，需要解析比较
-                import json
-                if isinstance(tool_input, str):
-                    try:
-                        tool_input_dict = json.loads(tool_input)
-                        assert str(expected_input["command"]) in str(tool_input_dict), (
-                            f"{attr_name} 应该包含命令，实际: {tool_input_dict}"
-                        )
-                        tool_input_found = True
-                        break
-                    except json.JSONDecodeError:
-                        # 如果不是 JSON，直接比较字符串
-                        if str(expected_input["command"]) in tool_input:
-                            tool_input_found = True
-                            break
-                elif isinstance(tool_input, dict):
-                    assert str(expected_input["command"]) in str(tool_input), (
-                        f"{attr_name} 应该包含命令，实际: {tool_input}"
-                    )
-                    tool_input_found = True
-                    break
-
-        # 验证 tool.output 和 is_error（如果存在）
-        if tool_use_id in tool_results:
-            tool_result = tool_results[tool_use_id]
-            
-            # 检查所有可能的 output 属性名称
-            tool_output_attrs = [
-                "gen_ai.tool.output",
-                "gen_ai.tool.result",
-                "gen_ai.tool.response",
-            ]
-            for attr_name in tool_output_attrs:
-                if attr_name in attrs:
-                    tool_output = attrs[attr_name]
-                    # tool_output 应该包含结果内容
-                    assert tool_result["content"] in str(tool_output), (
-                        f"{attr_name} 应该包含工具结果，实际: {tool_output}"
-                    )
-                    break
-
-            # 验证 is_error 状态
-            if tool_result["is_error"]:
-                # 如果工具调用失败，span 应该有错误状态（ERROR = 2）
-                assert tool_span.status.status_code.value in [2], (
-                    f"失败的 Tool span 应该有错误状态（ERROR），实际: {tool_span.status.status_code}"
-                )
-                # 验证错误属性
-                if "error.type" in attrs:
-                    assert attrs["error.type"] is not None, "失败的 Tool span 应该有 error.type 属性"
-            else:
-                # 如果工具调用成功，span 应该是 OK 状态（OK = 1）或 UNSET（UNSET = 0，表示没有错误）
-                assert tool_span.status.status_code.value in [0, 1], (
-                    f"成功的 Tool span 应该是 OK 或 UNSET 状态，实际: {tool_span.status.status_code}"
-                )
-
-    print(f"\n✅ Tool span 验证通过")
-    print(f"  - Tool span 数量: {len(tool_spans)}")
-    for i, tool_span in enumerate(tool_spans):
-        attrs = dict(tool_span.attributes or {})
-        tool_use_id = attrs.get(GenAIAttributes.GEN_AI_TOOL_CALL_ID)
-        tool_result = tool_results.get(tool_use_id, {})
-        print(f"  - Tool span {i+1}:")
-        print(f"    - 名称: {tool_span.name}")
-        print(f"    - Tool name: {attrs.get(GenAIAttributes.GEN_AI_TOOL_NAME)}")
-        print(f"    - Tool call ID: {tool_use_id}")
-        print(f"    - Is error: {tool_result.get('is_error', False)}")
-        print(f"    - Status: {tool_span.status.status_code}")
-
-
-@pytest.mark.asyncio
-async def test_span_hierarchy_correctness(instrument, span_exporter, tracer_provider):
-    """验证 span 层次结构的正确性。
-    
-    验证内容：
-    1. Agent span 是根 span
-    2. LLM span 是 Agent span 的子 span
-    3. Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
-    4. 所有 spans 的层次结构正确
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    test_case = TEST_CASE_1_FOO_SH_COMMAND
-
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=test_case["prompt"],
-    ):
-        pass
-
-    spans = span_exporter.get_finished_spans()
-    agent_span = find_agent_span(spans)
-    llm_spans = find_llm_spans(spans)
-    tool_spans = find_tool_spans(spans)
-
-    # 验证 Agent span 是根 span
-    assert agent_span is not None, "应该有 Agent span"
-    assert agent_span.parent is None, "Agent span 应该是根 span，没有 parent"
-
-    # 验证 LLM span 是 Agent span 的子 span
-    assert len(llm_spans) > 0, "应该有至少一个 LLM span"
-    for llm_span in llm_spans:
-        assert llm_span.parent is not None, "LLM span 应该有 parent"
-        assert llm_span.parent.span_id == agent_span.context.span_id, (
-            f"LLM span 的 parent 应该是 Agent span"
-        )
-
-    # 验证 Tool span 是 Agent span 的子 span（不是 LLM span 的子 span）
-    assert len(tool_spans) > 0, "应该有至少一个 Tool span"
-    for tool_span in tool_spans:
-        assert tool_span.parent is not None, "Tool span 应该有 parent"
-        assert tool_span.parent.span_id == agent_span.context.span_id, (
-            f"Tool span 的 parent 应该是 Agent span，不是 LLM span"
-        )
-        # 确保 Tool span 不是 LLM span 的子 span
-        for llm_span in llm_spans:
-            assert tool_span.parent.span_id != llm_span.context.span_id, (
-                f"Tool span 不应该是 LLM span 的子 span"
-            )
-
-    print(f"\n✅ Span 层次结构验证通过")
-    print(f"  - Agent span: {agent_span.name} (根 span)")
-    print(f"  - LLM spans: {len(llm_spans)} 个（Agent 的子 span）")
-    print(f"  - Tool spans: {len(tool_spans)} 个（Agent 的子 span）")
-
-
-def validate_message_structure(messages, expected_structure):
-    """严格验证 messages 的结构是否符合期望。
-    
-    Args:
-        messages: 实际的 messages（JSON 字符串或列表）
-        expected_structure: 期望的结构定义
-        
-    Returns:
-        (bool, str): (是否匹配, 错误消息)
-    """
-    import json
-    
-    # 解析 messages
-    if isinstance(messages, str):
-        try:
-            messages = json.loads(messages)
-        except json.JSONDecodeError as e:
-            return False, f"JSON 解析失败: {e}"
-    
-    if not isinstance(messages, list):
-        return False, f"messages 应该是列表，实际: {type(messages)}"
-    
-    # 验证结构
-    if "roles" in expected_structure:
-        expected_roles = expected_structure["roles"]
-        actual_roles = [msg.get("role") for msg in messages if isinstance(msg, dict)]
-        if actual_roles != expected_roles:
-            return False, (
-                f"Role 顺序不匹配:\n"
-                f"  期望: {expected_roles}\n"
-                f"  实际: {actual_roles}"
-            )
-    
-    # 验证每个 message 的 parts 类型
-    if "parts_types" in expected_structure:
-        expected_parts_types = expected_structure["parts_types"]
-        for i, (msg, expected_types) in enumerate(zip(messages, expected_parts_types)):
-            if not isinstance(msg, dict):
-                continue
-            parts = msg.get("parts", [])
-            actual_types = [part.get("type") for part in parts if isinstance(part, dict)]
-            if actual_types != expected_types:
-                return False, (
-                    f"Message {i+1} 的 parts 类型不匹配:\n"
-                    f"  期望: {expected_types}\n"
-                    f"  实际: {actual_types}"
-                )
-    
-    return True, ""
-
-
-def match_span_to_expected(span, expected_span_def, parent_map):
-    """匹配实际 span 与期望的 span 定义。
-    
-    Args:
-        span: 实际的 span 对象
-        expected_span_def: 期望的 span 定义字典
-        parent_map: 父 span 的映射 {operation: span}
-        
-    Returns:
-        (bool, str): (是否匹配, 错误消息)
-    """
-    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
-        gen_ai_attributes as GenAIAttributes,
-    )
-    
-    attrs = dict(span.attributes or {})
-    
-    # 验证名称模式
-    if "name_pattern" in expected_span_def:
-        if expected_span_def["name_pattern"] not in span.name:
-            return False, f"Span 名称不匹配: 期望包含 '{expected_span_def['name_pattern']}', 实际: '{span.name}'"
-    
-    # 验证 operation
-    if "operation" in expected_span_def:
-        if GenAIAttributes.GEN_AI_OPERATION_NAME not in attrs:
-            return False, f"缺少 gen_ai.operation.name 属性"
-        if attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] != expected_span_def["operation"]:
-            return False, (
-                f"gen_ai.operation.name 不匹配: "
-                f"期望 '{expected_span_def['operation']}', "
-                f"实际 '{attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]}'"
-            )
-    
-    # 验证父 span
-    if "parent" in expected_span_def:
-        expected_parent = expected_span_def["parent"]
-        if expected_parent is None:
-            if span.parent is not None:
-                return False, f"期望根 span，但实际有 parent"
-        else:
-            if span.parent is None:
-                return False, f"期望 parent 是 '{expected_parent}'，但实际是根 span"
-            elif expected_parent in parent_map:
-                expected_parent_span = parent_map[expected_parent]
-                if span.parent.span_id != expected_parent_span.context.span_id:
-                    return False, (
-                        f"Parent span 不匹配: "
-                        f"期望 '{expected_parent}' (span_id={expected_parent_span.context.span_id}), "
-                        f"实际 span_id={span.parent.span_id}"
-                    )
-    
-    # 验证精确匹配的属性
-    if "attributes" in expected_span_def:
-        for attr_key, expected_value in expected_span_def["attributes"].items():
-            if attr_key not in attrs:
-                return False, f"缺少属性: {attr_key}"
-            if attrs[attr_key] != expected_value:
-                return False, (
-                    f"属性 {attr_key} 不匹配: "
-                    f"期望 {expected_value!r}, 实际 {attrs[attr_key]!r}"
-                )
-    
-    # 验证包含匹配的属性（值包含指定字符串或列表）
-    if "attributes_contains" in expected_span_def:
-        for attr_key, expected_value in expected_span_def["attributes_contains"].items():
-            if attr_key not in attrs:
-                return False, f"缺少属性: {attr_key}"
-            attr_value = attrs[attr_key]
-            
-            # 如果期望值是列表，验证每个元素
-            if isinstance(expected_value, list):
-                attr_value_str = str(attr_value)
-                for expected_item in expected_value:
-                    if expected_item not in attr_value_str:
-                        return False, (
-                            f"属性 {attr_key} 不包含期望内容: "
-                            f"期望包含 '{expected_item}', 实际: {attr_value_str[:200]}..."
-                        )
-            else:
-                # 字符串匹配
-                attr_value_str = str(attr_value)
-                if expected_value not in attr_value_str:
-                    return False, (
-                        f"属性 {attr_key} 不包含期望内容: "
-                        f"期望包含 '{expected_value}', 实际: {attr_value_str[:200]}..."
-                    )
-    
-    # 验证消息结构（如果定义了 message_structure）
-    if "message_structure" in expected_span_def:
-        for attr_key, expected_structure in expected_span_def["message_structure"].items():
-            if attr_key not in attrs:
-                return False, f"缺少属性: {attr_key}"
-            is_valid, error_msg = validate_message_structure(attrs[attr_key], expected_structure)
-            if not is_valid:
-                return False, f"属性 {attr_key} 结构验证失败: {error_msg}"
-    
-    # 验证错误状态
-    if "is_error" in expected_span_def:
-        expected_is_error = expected_span_def["is_error"]
-        actual_status = span.status.status_code.value
-        if expected_is_error:
-            # 期望错误状态（ERROR = 2）
-            if actual_status != 2:
-                return False, (
-                    f"期望错误状态，但实际状态码是 {actual_status}"
-                )
-        else:
-            # 期望成功状态（OK = 1 或 UNSET = 0）
-            if actual_status not in [0, 1]:
-                return False, (
-                    f"期望成功状态，但实际状态码是 {actual_status}"
-                )
-    
-    return True, ""
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        pytest.param(TEST_CASE_1_FOO_SH_COMMAND, id="foo_sh_command"),
-        pytest.param(TEST_CASE_2_ECHO_COMMAND, id="echo_command"),
-        pytest.param(TEST_CASE_3_PRETOOLUSE, id="pretooluse_hook"),
-    ],
-)
-async def test_all_spans_match_expected(
-    instrument, span_exporter, tracer_provider, test_case
-):
-    """验证实际生成的 spans 与 expected_spans 完全匹配。
-    
-    这个测试验证：
-    1. 生成的 spans 数量与 expected_spans 一致
-    2. 每个 span 的名称、操作类型、父 span 都匹配
-    3. 每个 span 的属性都完全匹配 expected_spans 中的定义
-    4. Span 的层次结构正确
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
-        gen_ai_attributes as GenAIAttributes,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    expected_spans = test_case.get("expected_spans", [])
-
-    if not expected_spans:
-        pytest.skip(f"测试用例 '{test_case['description']}' 没有定义 expected_spans")
-
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=test_case["prompt"],
-    ):
-        pass
-
-    spans = span_exporter.get_finished_spans()
-    
-    # 构建父 span 映射（按 operation 名称）
-    parent_map = {}
-    for span in spans:
-        attrs = dict(span.attributes or {})
-        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
-            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
-            parent_map[operation] = span
-    
-    # 验证 spans 数量
-    assert len(spans) == len(expected_spans), (
-        f"生成的 spans 数量不匹配: "
-        f"期望 {len(expected_spans)} 个，实际 {len(spans)} 个"
-    )
-    
-    # 按 operation 类型分组 spans
-    spans_by_operation = {}
-    for span in spans:
-        attrs = dict(span.attributes or {})
-        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
-            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
-            if operation not in spans_by_operation:
-                spans_by_operation[operation] = []
-            spans_by_operation[operation].append(span)
-    
-    # 验证每个期望的 span
-    operation_index_map = {}  # 跟踪每个 operation 类型的索引
-    for i, expected_span_def in enumerate(expected_spans):
-        expected_operation = expected_span_def.get("operation")
-        
-        if expected_operation not in spans_by_operation:
-            pytest.fail(
-                f"期望的 span #{i+1} (operation={expected_operation}) 不存在于生成的 spans 中"
-            )
-        
-        # 获取对应 operation 类型的 span（按顺序）
-        if expected_operation not in operation_index_map:
-            operation_index_map[expected_operation] = 0
-        
-        operation_index = operation_index_map[expected_operation]
-        if operation_index >= len(spans_by_operation[expected_operation]):
-            pytest.fail(
-                f"期望的 span #{i+1} (operation={expected_operation}) "
-                f"超出了该类型的实际数量 ({len(spans_by_operation[expected_operation])})"
-            )
-        
-        actual_span = spans_by_operation[expected_operation][operation_index]
-        operation_index_map[expected_operation] += 1
-        
-        # 匹配 span
-        is_match, error_msg = match_span_to_expected(actual_span, expected_span_def, parent_map)
-        assert is_match, (
-            f"Span #{i+1} (operation={expected_operation}) 不匹配:\n"
-            f"  {error_msg}\n"
-            f"  Span 名称: {actual_span.name}\n"
-            f"  Span 属性: {dict(actual_span.attributes or {})}"
-        )
-    
-    print(f"\n✅ 测试用例 '{test_case['description']}' 的所有 spans 验证通过")
-    print(f"  - 总 spans 数: {len(spans)}")
-    for operation, span_list in spans_by_operation.items():
-        print(f"  - {operation} spans: {len(span_list)} 个")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
index cb08b4c0e..524edfe1b 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
@@ -7,20 +7,20 @@
 - Span hierarchy and timeline
 """
 
-import pytest
+import json
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any, AsyncIterator, Dict, List
+from unittest.mock import MagicMock
+
+import pytest
 import yaml
 
 from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-
-from test_message_flow_cases import (
-    create_mock_stream_from_messages,
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
 )
 
-
 # ============================================================================
 # Helper Functions
 # ============================================================================
@@ -29,15 +29,88 @@
 def load_cassette(filename: str) -> Dict[str, Any]:
     """Load a test case from cassettes directory."""
     cassette_path = Path(__file__).parent / "cassettes" / filename
-    with open(cassette_path, 'r', encoding='utf-8') as f:
+    with open(cassette_path, "r", encoding="utf-8") as f:
         return yaml.safe_load(f)
 
 
+def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
+    """Create a mock message object from test data dictionary."""
+    mock_msg = MagicMock()
+    msg_type = message_data["type"]
+
+    mock_msg.__class__.__name__ = msg_type
+
+    if msg_type == "SystemMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.data = message_data["data"]
+
+    elif msg_type == "AssistantMessage":
+        mock_msg.model = message_data["model"]
+        mock_msg.content = []
+
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            block_type = block_data["type"]
+            mock_block.__class__.__name__ = block_type
+
+            if block_type == "TextBlock":
+                mock_block.text = block_data["text"]
+            elif block_type == "ToolUseBlock":
+                mock_block.id = block_data["id"]
+                mock_block.name = block_data["name"]
+                mock_block.input = block_data["input"]
+
+            mock_msg.content.append(mock_block)
+
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        mock_msg.error = message_data.get("error")
+
+    elif msg_type == "UserMessage":
+        mock_msg.content = []
+
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            mock_block.__class__.__name__ = block_data["type"]
+
+            if block_data["type"] == "ToolResultBlock":
+                mock_block.tool_use_id = block_data["tool_use_id"]
+                mock_block.content = block_data["content"]
+                mock_block.is_error = block_data["is_error"]
+
+            mock_msg.content.append(mock_block)
+
+        mock_msg.uuid = message_data.get("uuid")
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+
+    elif msg_type == "ResultMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.duration_ms = message_data["duration_ms"]
+        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
+        mock_msg.is_error = message_data["is_error"]
+        mock_msg.num_turns = message_data["num_turns"]
+        mock_msg.session_id = message_data.get("session_id")
+        mock_msg.total_cost_usd = message_data["total_cost_usd"]
+        mock_msg.usage = message_data["usage"]
+        mock_msg.result = message_data["result"]
+        mock_msg.structured_output = message_data.get("structured_output")
+
+    return mock_msg
+
+
+async def create_mock_stream_from_messages(
+    messages: List[Dict[str, Any]],
+) -> AsyncIterator[Any]:
+    """Create a mock async stream of messages."""
+    for message_data in messages:
+        yield create_mock_message_from_data(message_data)
+
+
 def find_agent_span(spans):
     """Find the Agent span."""
     from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
         gen_ai_attributes as GenAIAttributes,
     )
+
     for span in spans:
         attrs = dict(span.attributes or {})
         if attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent":
@@ -50,9 +123,12 @@ def find_llm_spans(spans):
     from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
         gen_ai_attributes as GenAIAttributes,
     )
+
     return [
-        s for s in spans
-        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat"
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == "chat"
     ]
 
 
@@ -61,9 +137,12 @@ def find_tool_spans(spans):
     from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
         gen_ai_attributes as GenAIAttributes,
     )
+
     return [
-        s for s in spans
-        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool"
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == "execute_tool"
     ]
 
 
@@ -92,7 +171,7 @@ def instrument(tracer_provider):
     from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
         ClaudeAgentSDKInstrumentor,
     )
-    
+
     instrumentor = ClaudeAgentSDKInstrumentor()
     instrumentor.instrument(tracer_provider=tracer_provider)
     yield instrumentor
@@ -105,21 +184,24 @@ def instrument(tracer_provider):
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("cassette_file", [
-    "test_foo_sh_command.yaml",
-    "test_echo_command.yaml",
-    "test_pretooluse_hook.yaml",
-])
+@pytest.mark.parametrize(
+    "cassette_file",
+    [
+        "test_foo_sh_command.yaml",
+        "test_echo_command.yaml",
+        "test_pretooluse_hook.yaml",
+    ],
+)
 async def test_agent_span_correctness(
     cassette_file, instrument, span_exporter, tracer_provider
 ):
-    """验证 Agent span 的正确性。
-    
-    验证内容：
-    1. Agent span 存在且唯一
-    2. Agent span 是根 span（没有 parent）
-    3. Agent span 包含正确的属性（operation.name, agent.name 等）
-    4. Agent span 包含 token 使用统计
+    """Verify Agent span correctness.
+
+    Validates:
+    1. Agent span exists and is unique
+    2. Agent span is a root span (no parent)
+    3. Agent span contains correct attributes (operation.name, agent.name, etc.)
+    4. Agent span includes token usage statistics
     """
     from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
         _process_agent_invocation_stream,
@@ -130,11 +212,11 @@ async def test_agent_span_correctness(
     from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
         ExtendedTelemetryHandler,
     )
-    
+
     test_case = load_cassette(cassette_file)
     handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
     mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
+
     async for _ in _process_agent_invocation_stream(
         wrapped_stream=mock_stream,
         handler=handler,
@@ -142,33 +224,47 @@ async def test_agent_span_correctness(
         prompt=test_case["prompt"],
     ):
         pass
-    
+
     spans = span_exporter.get_finished_spans()
     agent_span = find_agent_span(spans)
-    
-    # 验证 Agent span 存在且唯一
+
+    # Verify Agent span exists and is unique
     agent_spans = [
-        s for s in spans
-        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == "invoke_agent"
     ]
-    assert len(agent_spans) == 1, f"应该有且仅有一个 Agent span，实际: {len(agent_spans)}"
-    
-    # 验证是根 span
-    assert agent_span.parent is None, "Agent span 应该是根 span，没有 parent"
-    
-    # 验证必需属性
+    assert len(agent_spans) == 1, (
+        f"Should have exactly one Agent span, got: {len(agent_spans)}"
+    )
+
+    # Verify it's a root span
+    assert agent_span.parent is None, (
+        "Agent span should be a root span with no parent"
+    )
+
+    # Verify required attributes
     attrs = dict(agent_span.attributes or {})
     assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs
     assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "invoke_agent"
-    
-    # 验证包含 token 使用统计
-    assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs, "应该有 input_tokens"
-    assert GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs, "应该有 output_tokens"
-    
-    print(f"\n✅ Agent span 验证通过 ({cassette_file})")
-    print(f"  - Span 名称: {agent_span.name}")
-    print(f"  - Input tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)}")
-    print(f"  - Output tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS)}")
+
+    # Verify token usage statistics
+    assert GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS in attrs, (
+        "Should have input_tokens"
+    )
+    assert GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in attrs, (
+        "Should have output_tokens"
+    )
+
+    print(f"\n✅ Agent span validation passed ({cassette_file})")
+    print(f"  - Span name: {agent_span.name}")
+    print(
+        f"  - Input tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)}"
+    )
+    print(
+        f"  - Output tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS)}"
+    )
 
 
 # ============================================================================
@@ -177,21 +273,24 @@ async def test_agent_span_correctness(
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("cassette_file", [
-    "test_foo_sh_command.yaml",
-    "test_echo_command.yaml",
-    "test_pretooluse_hook.yaml",
-])
+@pytest.mark.parametrize(
+    "cassette_file",
+    [
+        "test_foo_sh_command.yaml",
+        "test_echo_command.yaml",
+        "test_pretooluse_hook.yaml",
+    ],
+)
 async def test_llm_span_correctness(
     cassette_file, instrument, span_exporter, tracer_provider
 ):
-    """验证 LLM span 的正确性。
-    
-    验证内容：
-    1. LLM span 存在且数量正确
-    2. LLM span 是 Agent span 的子 span
-    3. LLM span 的属性正确（model, provider, operation 等）
-    4. LLM span 的 output.messages 中 tool_call.id 唯一（无重复）
+    """Verify LLM span correctness.
+
+    Validates:
+    1. LLM spans exist with correct count
+    2. LLM spans are children of Agent span
+    3. LLM span attributes are correct (model, provider, operation, etc.)
+    4. LLM span output.messages have unique tool_call.id (no duplicates)
     """
     from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
         _process_agent_invocation_stream,
@@ -202,11 +301,11 @@ async def test_llm_span_correctness(
     from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
         ExtendedTelemetryHandler,
     )
-    
+
     test_case = load_cassette(cassette_file)
     handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
     mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
+
     async for _ in _process_agent_invocation_stream(
         wrapped_stream=mock_stream,
         handler=handler,
@@ -214,51 +313,56 @@ async def test_llm_span_correctness(
         prompt=test_case["prompt"],
     ):
         pass
-    
+
     spans = span_exporter.get_finished_spans()
     agent_span = find_agent_span(spans)
     llm_spans = find_llm_spans(spans)
-    
-    # 验证 LLM span 存在
-    assert len(llm_spans) > 0, "应该有至少一个 LLM span"
-    
-    # 验证所有 LLM span 是 Agent span 的子 span
+
+    # Verify LLM spans exist
+    assert len(llm_spans) > 0, "Should have at least one LLM span"
+
+    # Verify all LLM spans are children of Agent span
     for llm_span in llm_spans:
-        assert llm_span.parent is not None, "LLM span 应该有 parent"
+        assert llm_span.parent is not None, "LLM span should have a parent"
         assert llm_span.parent.span_id == agent_span.context.span_id, (
-            "LLM span 的 parent 应该是 Agent span"
+            "LLM span's parent should be Agent span"
         )
-        
-        # 验证基本属性
+
+        # Verify basic attributes
         attrs = dict(llm_span.attributes or {})
         assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat"
         assert GenAIAttributes.GEN_AI_REQUEST_MODEL in attrs
-        
-        # 验证 output.messages 中 tool_call.id 唯一性
+
+        # Verify uniqueness of tool_call.id in output.messages
         if GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in attrs:
-            import json
             output_messages_raw = attrs[GenAIAttributes.GEN_AI_OUTPUT_MESSAGES]
             if isinstance(output_messages_raw, str):
                 output_messages = json.loads(output_messages_raw)
             else:
                 output_messages = output_messages_raw
-            
+
             if isinstance(output_messages, list):
                 tool_call_ids = []
                 for msg in output_messages:
-                    if isinstance(msg, dict) and msg.get("role") == "assistant":
+                    if (
+                        isinstance(msg, dict)
+                        and msg.get("role") == "assistant"
+                    ):
                         parts = msg.get("parts", [])
                         for part in parts:
-                            if isinstance(part, dict) and part.get("type") == "tool_call":
+                            if (
+                                isinstance(part, dict)
+                                and part.get("type") == "tool_call"
+                            ):
                                 tool_call_id = part.get("id")
                                 if tool_call_id:
                                     assert tool_call_id not in tool_call_ids, (
-                                        f"发现重复的 tool_call ID: {tool_call_id}"
+                                        f"Found duplicate tool_call ID: {tool_call_id}"
                                     )
                                     tool_call_ids.append(tool_call_id)
-    
-    print(f"\n✅ LLM span 验证通过 ({cassette_file})")
-    print(f"  - LLM span 数量: {len(llm_spans)}")
+
+    print(f"\n✅ LLM span validation passed ({cassette_file})")
+    print(f"  - LLM span count: {len(llm_spans)}")
 
 
 # ============================================================================
@@ -267,21 +371,24 @@ async def test_llm_span_correctness(
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("cassette_file", [
-    "test_foo_sh_command.yaml",
-    "test_echo_command.yaml",
-    "test_pretooluse_hook.yaml",
-])
+@pytest.mark.parametrize(
+    "cassette_file",
+    [
+        "test_foo_sh_command.yaml",
+        "test_echo_command.yaml",
+        "test_pretooluse_hook.yaml",
+    ],
+)
 async def test_tool_span_correctness(
     cassette_file, instrument, span_exporter, tracer_provider
 ):
-    """验证 Tool span 的正确性。
-    
-    验证内容：
-    1. Tool span 存在且数量正确
-    2. Tool span 是 Agent span 的子 span（不是 LLM span）
-    3. Tool span 的属性正确（tool.name, tool.call.id, arguments, result 等）
-    4. Tool span 包含正确的 is_error 状态
+    """Verify Tool span correctness.
+
+    Validates:
+    1. Tool spans exist with correct count
+    2. Tool spans are children of Agent span (not LLM span)
+    3. Tool span attributes are correct (tool.name, tool.call.id, arguments, result, etc.)
+    4. Tool span contains correct is_error status
     """
     from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
         _process_agent_invocation_stream,
@@ -292,11 +399,11 @@ async def test_tool_span_correctness(
     from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
         ExtendedTelemetryHandler,
     )
-    
+
     test_case = load_cassette(cassette_file)
     handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
     mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
+
     async for _ in _process_agent_invocation_stream(
         wrapped_stream=mock_stream,
         handler=handler,
@@ -304,36 +411,42 @@ async def test_tool_span_correctness(
         prompt=test_case["prompt"],
     ):
         pass
-    
+
     spans = span_exporter.get_finished_spans()
     agent_span = find_agent_span(spans)
     llm_spans = find_llm_spans(spans)
     tool_spans = find_tool_spans(spans)
-    
-    # 验证 Tool span 存在
-    assert len(tool_spans) > 0, "应该有至少一个 Tool span"
-    
-    # 验证所有 Tool span 是 Agent span 的子 span（不是 LLM span）
+
+    # Verify Tool spans exist
+    assert len(tool_spans) > 0, "Should have at least one Tool span"
+
+    # Verify all Tool spans are children of Agent span (not LLM span)
     for tool_span in tool_spans:
-        assert tool_span.parent is not None, "Tool span 应该有 parent"
+        assert tool_span.parent is not None, "Tool span should have a parent"
         assert tool_span.parent.span_id == agent_span.context.span_id, (
-            "Tool span 的 parent 应该是 Agent span，不是 LLM span"
+            "Tool span's parent should be Agent span, not LLM span"
         )
-        
-        # 确保不是 LLM span 的子 span
+
+        # Ensure it's not a child of LLM span
         for llm_span in llm_spans:
             assert tool_span.parent.span_id != llm_span.context.span_id, (
-                "Tool span 不应该是 LLM span 的子 span"
+                "Tool span should not be a child of LLM span"
             )
-        
-        # 验证基本属性
+
+        # Verify basic attributes
         attrs = dict(tool_span.attributes or {})
-        assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool"
-        assert GenAIAttributes.GEN_AI_TOOL_NAME in attrs, "应该有 tool.name"
-        assert GenAIAttributes.GEN_AI_TOOL_CALL_ID in attrs, "应该有 tool.call.id"
-    
-    print(f"\n✅ Tool span 验证通过 ({cassette_file})")
-    print(f"  - Tool span 数量: {len(tool_spans)}")
+        assert (
+            attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "execute_tool"
+        )
+        assert GenAIAttributes.GEN_AI_TOOL_NAME in attrs, (
+            "Should have tool.name"
+        )
+        assert GenAIAttributes.GEN_AI_TOOL_CALL_ID in attrs, (
+            "Should have tool.call.id"
+        )
+
+    print(f"\n✅ Tool span validation passed ({cassette_file})")
+    print(f"  - Tool span count: {len(tool_spans)}")
 
 
 # ============================================================================
@@ -342,21 +455,24 @@ async def test_tool_span_correctness(
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("cassette_file", [
-    "test_foo_sh_command.yaml",
-    "test_echo_command.yaml",
-    "test_pretooluse_hook.yaml",
-])
+@pytest.mark.parametrize(
+    "cassette_file",
+    [
+        "test_foo_sh_command.yaml",
+        "test_echo_command.yaml",
+        "test_pretooluse_hook.yaml",
+    ],
+)
 async def test_span_hierarchy_correctness(
     cassette_file, instrument, span_exporter, tracer_provider
 ):
-    """验证 Span 层次结构的正确性。
-    
-    验证内容：
-    1. Agent span 是根 span
-    2. LLM span 是 Agent span 的子 span
-    3. Tool span 是 Agent span 的子 span（不是 LLM span）
-    4. Span 的时间线是串行的（LLM → Tool → LLM）
+    """Verify span hierarchy correctness.
+
+    Validates:
+    1. Agent span is the root span
+    2. LLM spans are children of Agent span
+    3. Tool spans are children of Agent span (not LLM span)
+    4. Span timeline is sequential (LLM → Tool → LLM)
     """
     from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
         _process_agent_invocation_stream,
@@ -364,11 +480,11 @@ async def test_span_hierarchy_correctness(
     from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
         ExtendedTelemetryHandler,
     )
-    
+
     test_case = load_cassette(cassette_file)
     handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
     mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
+
     async for _ in _process_agent_invocation_stream(
         wrapped_stream=mock_stream,
         handler=handler,
@@ -376,39 +492,39 @@ async def test_span_hierarchy_correctness(
         prompt=test_case["prompt"],
     ):
         pass
-    
+
     spans = span_exporter.get_finished_spans()
     agent_span = find_agent_span(spans)
     llm_spans = find_llm_spans(spans)
     tool_spans = find_tool_spans(spans)
-    
-    # 验证 Agent span 是根 span
-    assert agent_span is not None, "应该有 Agent span"
-    assert agent_span.parent is None, "Agent span 应该是根 span"
-    
-    # 验证 LLM span 是 Agent span 的子 span
-    assert len(llm_spans) > 0, "应该有至少一个 LLM span"
+
+    # Verify Agent span is root
+    assert agent_span is not None, "Should have Agent span"
+    assert agent_span.parent is None, "Agent span should be root span"
+
+    # Verify LLM spans are children of Agent span
+    assert len(llm_spans) > 0, "Should have at least one LLM span"
     for llm_span in llm_spans:
-        assert llm_span.parent is not None, "LLM span 应该有 parent"
+        assert llm_span.parent is not None, "LLM span should have a parent"
         assert llm_span.parent.span_id == agent_span.context.span_id, (
-            "LLM span 的 parent 应该是 Agent span"
+            "LLM span's parent should be Agent span"
         )
-    
-    # 验证 Tool span 是 Agent span 的子 span
-    assert len(tool_spans) > 0, "应该有至少一个 Tool span"
+
+    # Verify Tool spans are children of Agent span
+    assert len(tool_spans) > 0, "Should have at least one Tool span"
     for tool_span in tool_spans:
-        assert tool_span.parent is not None, "Tool span 应该有 parent"
+        assert tool_span.parent is not None, "Tool span should have a parent"
         assert tool_span.parent.span_id == agent_span.context.span_id, (
-            "Tool span 的 parent 应该是 Agent span"
+            "Tool span's parent should be Agent span"
         )
-        
-        # 确保不是 LLM span 的子 span
+
+        # Ensure it's not a child of LLM span
         for llm_span in llm_spans:
             assert tool_span.parent.span_id != llm_span.context.span_id, (
-                "Tool span 不应该是 LLM span 的子 span"
+                "Tool span should not be a child of LLM span"
             )
-    
-    print(f"\n✅ Span 层次结构验证通过 ({cassette_file})")
-    print(f"  - Agent span: {agent_span.name} (根 span)")
-    print(f"  - LLM spans: {len(llm_spans)} 个（Agent 的子 span）")
-    print(f"  - Tool spans: {len(tool_spans)} 个（Agent 的子 span）")
+
+    print(f"\n✅ Span hierarchy validation passed ({cassette_file})")
+    print(f"  - Agent span: {agent_span.name} (root span)")
+    print(f"  - LLM spans: {len(llm_spans)} (children of Agent)")
+    print(f"  - Tool spans: {len(tool_spans)} (children of Agent)")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py
deleted file mode 100644
index e501357a3..000000000
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_tool.py
+++ /dev/null
@@ -1,247 +0,0 @@
-"""Test Task tool specific behavior: span hierarchy and message filtering."""
-
-import json
-from pathlib import Path
-from typing import Any, AsyncIterator, Dict, List
-from unittest.mock import MagicMock
-
-import pytest
-import yaml
-
-
-def load_cassette(filename: str):
-    """Load cassette file from tests/cassettes directory."""
-    cassette_path = Path(__file__).parent / "cassettes" / filename
-    with open(cassette_path, "r", encoding="utf-8") as f:
-        return yaml.safe_load(f)
-
-
-def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
-    """Create a mock message object from cassette data."""
-    mock_msg = MagicMock()
-    mock_msg.__class__.__name__ = message_data["type"]
-    
-    # 基本属性
-    mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
-    
-    if message_data["type"] == "SystemMessage":
-        mock_msg.subtype = message_data.get("subtype")
-        if "data" in message_data:
-            for key, value in message_data["data"].items():
-                setattr(mock_msg, key, value)
-    
-    elif message_data["type"] == "AssistantMessage":
-        mock_msg.model = message_data.get("model")
-        mock_msg.error = message_data.get("error")
-        mock_msg.content = []
-        
-        if "content" in message_data:
-            for block in message_data["content"]:
-                mock_block = MagicMock()
-                mock_block.__class__.__name__ = block["type"]
-                
-                if block["type"] == "TextBlock":
-                    mock_block.text = block.get("text", "")
-                elif block["type"] == "ToolUseBlock":
-                    mock_block.id = block.get("id")
-                    mock_block.name = block.get("name")
-                    mock_block.input = block.get("input", {})
-                
-                mock_msg.content.append(mock_block)
-    
-    elif message_data["type"] == "UserMessage":
-        mock_msg.uuid = message_data.get("uuid")
-        mock_msg.content = []
-        
-        if "content" in message_data:
-            for block in message_data["content"]:
-                mock_block = MagicMock()
-                mock_block.__class__.__name__ = block["type"]
-                
-                if block["type"] == "ToolResultBlock":
-                    mock_block.tool_use_id = block.get("tool_use_id")
-                    mock_block.content = block.get("content")
-                    mock_block.is_error = block.get("is_error", False)
-                elif block["type"] == "TextBlock":
-                    mock_block.text = block.get("text", "")
-                
-                mock_msg.content.append(mock_block)
-    
-    elif message_data["type"] == "ResultMessage":
-        mock_msg.subtype = message_data.get("subtype")
-        mock_msg.duration_ms = message_data.get("duration_ms")
-        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
-        mock_msg.is_error = message_data.get("is_error", False)
-        mock_msg.num_turns = message_data.get("num_turns")
-        mock_msg.session_id = message_data.get("session_id")
-        mock_msg.total_cost_usd = message_data.get("total_cost_usd")
-        mock_msg.usage = message_data.get("usage")
-        mock_msg.result = message_data.get("result")
-        mock_msg.structured_output = message_data.get("structured_output")
-    
-    return mock_msg
-
-
-async def create_mock_stream_from_messages(
-    messages: List[Dict[str, Any]]
-) -> AsyncIterator[Any]:
-    """Create mock async stream from message data."""
-    for message_data in messages:
-        yield create_mock_message_from_data(message_data)
-
-
-@pytest.mark.asyncio
-async def test_task_tool_span_hierarchy(instrument, span_exporter, tracer_provider):
-    """Test that Task tool creates proper span hierarchy with subagent spans as children."""
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-    
-    cassette = load_cassette("test_task_tool.yaml")
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    mock_stream = create_mock_stream_from_messages(cassette["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=cassette["prompt"],
-    ):
-        pass
-    
-    spans = span_exporter.get_finished_spans()
-    
-    # Expected spans:
-    # 1. invoke_agent (root)
-    # 2. LLM₁ chat (main agent decides to use Task)
-    # 3. Task execute_tool (parent for subagent work)
-    # 4. LLM₂ chat (inside Task - child of Task span)
-    # 5. Read execute_tool (inside Task - child of Task span)
-    # 6. LLM₃ chat (inside Task - child of Task span)
-    # 7. LLM₄ chat (main agent summarizes)
-    
-    assert len(spans) >= 7, f"Expected at least 7 spans, got {len(spans)}"
-    
-    # Find spans by operation
-    agent_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "invoke_agent"]
-    llm_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "chat"]
-    tool_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "execute_tool"]
-    
-    assert len(agent_spans) == 1, f"Expected 1 agent span, got {len(agent_spans)}"
-    assert len(llm_spans) >= 4, f"Expected at least 4 LLM spans, got {len(llm_spans)}"
-    assert len(tool_spans) >= 2, f"Expected at least 2 tool spans (Task + Read), got {len(tool_spans)}"
-    
-    agent_span = agent_spans[0]
-    
-    # Find the Task tool span
-    task_spans = [s for s in tool_spans if s.attributes.get("gen_ai.tool.name") == "Task"]
-    assert len(task_spans) == 1, f"Expected 1 Task span, got {len(task_spans)}"
-    task_span = task_spans[0]
-    
-    # Verify Task span is child of agent span
-    assert task_span.parent is not None, "Task span should have a parent"
-    assert task_span.parent.span_id == agent_span.context.span_id, \
-        "Task span should be child of agent span"
-    
-    # Find the Read tool span (inside Task)
-    read_spans = [s for s in tool_spans if s.attributes.get("gen_ai.tool.name") == "Read"]
-    assert len(read_spans) == 1, f"Expected 1 Read span, got {len(read_spans)}"
-    read_span = read_spans[0]
-    
-    # Verify Read span is child of Task span
-    assert read_span.parent is not None, "Read span should have a parent"
-    assert read_span.parent.span_id == task_span.context.span_id, \
-        "Read span should be child of Task span (not agent span)"
-    
-    # Find LLM spans inside Task
-    # They should be children of Task span
-    task_llm_spans = [s for s in llm_spans if s.parent and s.parent.span_id == task_span.context.span_id]
-    
-    assert len(task_llm_spans) >= 2, \
-        f"Expected at least 2 LLM spans inside Task, got {len(task_llm_spans)}"
-
-
-@pytest.mark.asyncio
-async def test_task_tool_message_filtering(instrument, span_exporter, tracer_provider):
-    """Test that Task internal messages don't appear in parent LLM's input/output."""
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
-    )
-    
-    cassette = load_cassette("test_task_tool.yaml")
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    mock_stream = create_mock_stream_from_messages(cassette["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=cassette["prompt"],
-    ):
-        pass
-    
-    spans = span_exporter.get_finished_spans()
-    llm_spans = [s for s in spans if s.attributes.get("gen_ai.operation.name") == "chat"]
-    
-    # Find the last LLM span (LLM₄ - main agent summarizes after Task completes)
-    # This should be the LLM that receives the Task result
-    last_llm_span = llm_spans[-1]
-    
-    # Get input messages
-    input_messages_str = last_llm_span.attributes.get("gen_ai.input.messages")
-    assert input_messages_str is not None, "LLM span should have input.messages"
-    
-    try:
-        input_messages = json.loads(input_messages_str)
-    except (json.JSONDecodeError, TypeError):
-        input_messages = input_messages_str
-    
-    # The last LLM's input should contain:
-    # 1. User prompt
-    # 2. Assistant decision to use Task
-    # 3. Task tool_call
-    # 4. Task tool_call_response (result)
-    #
-    # It should NOT contain:
-    # - "I'll read the file first" (internal to Task)
-    # - Read tool call (internal to Task)
-    # - Read tool result (internal to Task)
-    # - "The code looks good..." (internal to Task)
-    
-    # Convert to string for easier checking
-    input_str = str(input_messages)
-    
-    # Should contain Task-level interactions
-    assert "Task" in input_str, "Should contain Task tool call"
-    assert "Code review completed" in input_str, "Should contain Task result"
-    
-    # Should NOT contain Task internal messages
-    assert "I'll read the file first" not in input_str, \
-        "Should NOT contain Task internal assistant message"
-    assert "call_read_001" not in input_str, \
-        "Should NOT contain Task internal Read tool call ID"
-    assert "class MyType" not in input_str, \
-        "Should NOT contain Task internal Read tool result"
-    assert "The code looks good" not in input_str, \
-        "Should NOT contain Task internal analysis text"
-    
-    # Get output messages
-    output_messages_str = last_llm_span.attributes.get("gen_ai.output.messages")
-    assert output_messages_str is not None, "LLM span should have output.messages"
-    
-    try:
-        output_messages = json.loads(output_messages_str)
-    except (json.JSONDecodeError, TypeError):
-        output_messages = output_messages_str
-    output_str = str(output_messages)
-    
-    # Output should be the final summary
-    assert "code-reviewer agent completed" in output_str.lower() or \
-           "analysis" in output_str.lower(), \
-        "Output should contain summary from main agent"
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
index 771309bfd..68307d10b 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
@@ -1,19 +1,21 @@
 """Tests for Claude Agent SDK instrumentation using cassette-based test data.
 
-This test module uses YAML cassettes (similar to dashscope instrumentation) to test
-the _process_agent_invocation_stream function with real message sequences.
+This test module uses YAML cassettes to test the _process_agent_invocation_stream
+function with real message sequences from claude-agent-sdk-python examples.
 """
 
-import pytest
-import yaml
 from pathlib import Path
 from typing import Any, AsyncIterator, Dict, List
 from unittest.mock import MagicMock
 
+import pytest
+import yaml
+
 from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
 
 # ============================================================================
 # Cassette Loading
@@ -23,8 +25,8 @@
 def load_cassette(filename: str) -> Dict[str, Any]:
     """Load test case from cassettes directory."""
     cassette_path = Path(__file__).parent / "cassettes" / filename
-    
-    with open(cassette_path, 'r', encoding='utf-8') as f:
+
+    with open(cassette_path, "r", encoding="utf-8") as f:
         return yaml.safe_load(f)
 
 
@@ -35,7 +37,7 @@ def get_all_cassettes() -> List[str]:
 
 
 # ============================================================================
-# Helper Functions
+# Mock Message Helpers
 # ============================================================================
 
 
@@ -43,51 +45,51 @@ def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
     """Create a mock message object from test data dictionary."""
     mock_msg = MagicMock()
     msg_type = message_data["type"]
-    
+
     mock_msg.__class__.__name__ = msg_type
-    
+
     if msg_type == "SystemMessage":
         mock_msg.subtype = message_data["subtype"]
         mock_msg.data = message_data["data"]
-        
+
     elif msg_type == "AssistantMessage":
         mock_msg.model = message_data["model"]
         mock_msg.content = []
-        
+
         for block_data in message_data["content"]:
             mock_block = MagicMock()
             block_type = block_data["type"]
             mock_block.__class__.__name__ = block_type
-            
+
             if block_type == "TextBlock":
                 mock_block.text = block_data["text"]
             elif block_type == "ToolUseBlock":
                 mock_block.id = block_data["id"]
                 mock_block.name = block_data["name"]
                 mock_block.input = block_data["input"]
-            
+
             mock_msg.content.append(mock_block)
-            
+
         mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
         mock_msg.error = message_data.get("error")
-        
+
     elif msg_type == "UserMessage":
         mock_msg.content = []
-        
+
         for block_data in message_data["content"]:
             mock_block = MagicMock()
             mock_block.__class__.__name__ = block_data["type"]
-            
+
             if block_data["type"] == "ToolResultBlock":
                 mock_block.tool_use_id = block_data["tool_use_id"]
                 mock_block.content = block_data["content"]
                 mock_block.is_error = block_data["is_error"]
-                
+
             mock_msg.content.append(mock_block)
-            
+
         mock_msg.uuid = message_data.get("uuid")
         mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
-        
+
     elif msg_type == "ResultMessage":
         mock_msg.subtype = message_data["subtype"]
         mock_msg.duration_ms = message_data["duration_ms"]
@@ -99,12 +101,12 @@ def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
         mock_msg.usage = message_data["usage"]
         mock_msg.result = message_data["result"]
         mock_msg.structured_output = message_data.get("structured_output")
-    
+
     return mock_msg
 
 
 async def create_mock_stream_from_messages(
-    messages: List[Dict[str, Any]]
+    messages: List[Dict[str, Any]],
 ) -> AsyncIterator[Any]:
     """Create a mock async stream of messages."""
     for message_data in messages:
@@ -137,7 +139,7 @@ def instrument(tracer_provider):
     from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
         ClaudeAgentSDKInstrumentor,
     )
-    
+
     instrumentor = ClaudeAgentSDKInstrumentor()
     instrumentor.instrument(tracer_provider=tracer_provider)
     yield instrumentor
@@ -154,12 +156,14 @@ def instrument(tracer_provider):
 async def test_agent_invocation_with_cassette(
     cassette_file, instrument, span_exporter, tracer_provider
 ):
-    """测试使用 cassette 数据的 agent invocation。
-    
-    这个测试：
-    1. 从 cassette 文件加载真实的消息序列
-    2. 使用 _process_agent_invocation_stream 处理消息
-    3. 验证生成的 spans 数量和基本属性
+    """Test agent invocation with cassette data.
+
+    This test:
+    1. Loads real message sequences from cassette file
+    2. Processes messages using _process_agent_invocation_stream
+    3. Verifies the number and basic properties of generated spans
+
+    For detailed span validation, see test_span_validation.py
     """
     from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
         _process_agent_invocation_stream,
@@ -170,14 +174,14 @@ async def test_agent_invocation_with_cassette(
     from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
         ExtendedTelemetryHandler,
     )
-    
-    # 加载 cassette
+
+    # Load cassette
     test_case = load_cassette(cassette_file)
-    
+
     handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
     mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    # 处理消息流
+
+    # Process message stream
     async for _ in _process_agent_invocation_stream(
         wrapped_stream=mock_stream,
         handler=handler,
@@ -185,129 +189,38 @@ async def test_agent_invocation_with_cassette(
         prompt=test_case["prompt"],
     ):
         pass
-    
-    # 验证生成的 spans
+
+    # Verify generated spans
     spans = span_exporter.get_finished_spans()
-    
-    # 基本验证
-    assert len(spans) > 0, f"应该生成至少一个 span for {cassette_file}"
-    
-    # 验证 Agent span 存在
+
+    # Basic validation
+    assert len(spans) > 0, (
+        f"Should generate at least one span for {cassette_file}"
+    )
+
+    # Verify Agent span exists
     agent_spans = [
-        s for s in spans
-        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == "invoke_agent"
     ]
-    assert len(agent_spans) == 1, f"应该有一个 Agent span for {cassette_file}"
-    
-    # 验证 LLM spans 存在
+    assert len(agent_spans) == 1, (
+        f"Should have one Agent span for {cassette_file}"
+    )
+
+    # Verify LLM spans exist
     llm_spans = [
-        s for s in spans
-        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat"
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == "chat"
     ]
-    assert len(llm_spans) > 0, f"应该有至少一个 LLM span for {cassette_file}"
-    
-    print(f"\n✅ {cassette_file}: {len(spans)} spans (Agent: 1, LLM: {len(llm_spans)})")
-
-
-@pytest.mark.asyncio
-@pytest.mark.parametrize("cassette_file", get_all_cassettes())
-async def test_spans_match_expected(
-    cassette_file, instrument, span_exporter, tracer_provider
-):
-    """验证实际生成的 spans 与 expected_spans 完全匹配。
-    
-    这个测试验证：
-    1. 生成的 spans 数量与 expected_spans 一致
-    2. 每个 span 的名称、操作类型、父 span 都匹配
-    3. 每个 span 的属性都完全匹配 expected_spans 中的定义
-    4. Span 的层次结构正确
-    """
-    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
-        _process_agent_invocation_stream,
-    )
-    from opentelemetry.semconv._incubating.attributes import (  # noqa: PLC0415
-        gen_ai_attributes as GenAIAttributes,
-    )
-    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
-        ExtendedTelemetryHandler,
+    assert len(llm_spans) > 0, (
+        f"Should have at least one LLM span for {cassette_file}"
     )
-    from test_message_flow_cases import (  # noqa: PLC0415
-        match_span_to_expected,
-    )
-    
-    # 加载 cassette
-    test_case = load_cassette(cassette_file)
-    expected_spans = test_case.get("expected_spans", [])
-    
-    if not expected_spans:
-        pytest.skip(f"{cassette_file} 没有定义 expected_spans")
-    
-    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
-    mock_stream = create_mock_stream_from_messages(test_case["messages"])
-    
-    async for _ in _process_agent_invocation_stream(
-        wrapped_stream=mock_stream,
-        handler=handler,
-        model="qwen-plus",
-        prompt=test_case["prompt"],
-    ):
-        pass
-    
-    spans = span_exporter.get_finished_spans()
-    
-    # 构建父 span 映射
-    parent_map = {}
-    for span in spans:
-        attrs = dict(span.attributes or {})
-        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
-            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
-            parent_map[operation] = span
-    
-    # 验证 spans 数量
-    assert len(spans) == len(expected_spans), (
-        f"生成的 spans 数量不匹配: "
-        f"期望 {len(expected_spans)} 个，实际 {len(spans)} 个"
+
+    print(
+        f"\n✅ {cassette_file}: {len(spans)} spans "
+        f"(Agent: 1, LLM: {len(llm_spans)})"
     )
-    
-    # 按 operation 类型分组 spans
-    spans_by_operation = {}
-    for span in spans:
-        attrs = dict(span.attributes or {})
-        if GenAIAttributes.GEN_AI_OPERATION_NAME in attrs:
-            operation = attrs[GenAIAttributes.GEN_AI_OPERATION_NAME]
-            if operation not in spans_by_operation:
-                spans_by_operation[operation] = []
-            spans_by_operation[operation].append(span)
-    
-    # 验证每个期望的 span
-    operation_index_map = {}
-    for i, expected_span_def in enumerate(expected_spans):
-        expected_operation = expected_span_def.get("operation")
-        
-        if expected_operation not in spans_by_operation:
-            pytest.fail(
-                f"期望的 span #{i+1} (operation={expected_operation}) 不存在于生成的 spans 中"
-            )
-        
-        if expected_operation not in operation_index_map:
-            operation_index_map[expected_operation] = 0
-        
-        operation_index = operation_index_map[expected_operation]
-        if operation_index >= len(spans_by_operation[expected_operation]):
-            pytest.fail(
-                f"期望的 span #{i+1} (operation={expected_operation}) "
-                f"超出了该类型的实际数量 ({len(spans_by_operation[expected_operation])})"
-            )
-        
-        actual_span = spans_by_operation[expected_operation][operation_index]
-        operation_index_map[expected_operation] += 1
-        
-        # 匹配 span
-        is_match, error_msg = match_span_to_expected(actual_span, expected_span_def, parent_map)
-        assert is_match, (
-            f"Span #{i+1} (operation={expected_operation}) 不匹配:\n"
-            f"  {error_msg}\n"
-            f"  Span 名称: {actual_span.name}"
-        )
-    
-    print(f"\n✅ {cassette_file}: 所有 {len(expected_spans)} 个 spans 验证通过")

From b305204ef81a0db57ff27543ba8700ada440db6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 26 Jan 2026 16:35:56 +0800
Subject: [PATCH 19/23] [fix] fix stop reason error

Change-Id: Ieb6c0fa2f44b1b517b2998da0b58afffd4598d15
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../pyproject.toml                                 |  2 +-
 .../instrumentation/claude_agent_sdk/patch.py      |  5 +++++
 .../tests/test_attributes.py                       | 14 --------------
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
index aefdbdf04..5586bd14b 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.rst"
 license = "Apache-2.0"
 requires-python = ">=3.10"
 authors = [
-  { name = "LoongSuite Python Agent Authors", email = "qp467389@alibaba-inc.com" },
+  { name = "LoongSuite Python Agent Authors", email = "caishipeng.csp@alibaba-inc.com" },
 ]
 classifiers = [
   "Development Status :: 4 - Beta",
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 341a97b23..e1e8c45dc 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -256,8 +256,13 @@ def _process_assistant_message(
                     turn_tracker.current_llm_invocation.output_messages[-1]
                 )
                 last_output_msg.parts.extend(parts)
+                last_output_msg.finish_reason = "tool_calls"
             else:
                 turn_tracker.add_assistant_output(parts)
+                output_msg = OutputMessage(
+                    role="assistant", parts=list(parts), finish_reason="tool_calls"
+                )
+                turn_tracker.current_llm_invocation.output_messages.append(output_msg)
 
         # Only add to collected_messages if not inside a Task
         if not is_inside_task:
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
index 8725e3cd0..fcb88fd53 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
@@ -8,8 +8,6 @@
 from opentelemetry.instrumentation.claude_agent_sdk import (
     ClaudeAgentSDKInstrumentor,
     __version__,
-    hooks,
-    utils,
 )
 from opentelemetry.sdk.metrics import MeterProvider
 from opentelemetry.semconv._incubating.attributes import (
@@ -307,15 +305,3 @@ async def test_span_attributes_no_sensitive_data(
                 # Sensitive content should not be in attributes
                 assert "secret123" not in attr_value.lower()
 
-
-def test_hooks_are_exported():
-    """Test that hooks are exported for external use."""
-    # Check internal hooks can be imported
-    assert hasattr(hooks, "pre_tool_use_hook")
-    assert hasattr(hooks, "post_tool_use_hook")
-
-
-def test_utils_are_internal():
-    """Test that utils are properly organized."""
-    # Utils should have the helper functions
-    assert hasattr(utils, "extract_usage_metadata")

From 393bb1284e09ca751ac89d01941af80134d7d838 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Mon, 26 Jan 2026 19:32:01 +0800
Subject: [PATCH 20/23] [feat] add test

Change-Id: Ib3954bfc8863415404e7b2a12a9b3ee147754ff6
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/patch.py | 166 +++++-
 .../cassettes/test_doc_writer_with_task.yaml  | 171 +++++++
 .../tests/test_attributes.py                  |   1 -
 .../tests/test_task_subagent_real_data.py     | 476 ++++++++++++++++++
 .../tests/test_with_cassettes.py              |  31 +-
 5 files changed, 815 insertions(+), 30 deletions(-)
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
 create mode 100644 instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index e1e8c45dc..c0b30423d 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -116,17 +116,19 @@ def _create_tool_spans_from_message(
 ) -> None:
     """Create tool execution spans from ToolUseBlocks in an AssistantMessage.
 
-    Tool spans are children of the active Task span (if any), otherwise agent span.
-    When a Task tool is created, it's pushed onto active_task_stack.
+    Tool spans are children of the active SubAgent span (if any), otherwise agent span.
+    When a Task tool is created, it's pushed onto active_task_stack along with a SubAgent span.
+
+    The stack structure is: [{"task": ExecuteToolInvocation, "subagent": InvokeAgentInvocation}, ...]
     """
     if not hasattr(msg, "content"):
         return
 
     exclude_tool_names = exclude_tool_names or []
 
-    # Determine parent span: use active Task span if exists, otherwise agent span
+    # Determine parent span: use active SubAgent span if exists, otherwise agent span
     parent_span = (
-        active_task_stack[-1].span
+        active_task_stack[-1]["subagent"].span
         if active_task_stack
         else agent_invocation.span
     )
@@ -162,12 +164,64 @@ def _create_tool_spans_from_message(
                 handler.start_execute_tool(tool_invocation)
                 _client_managed_runs[tool_use_id] = tool_invocation
 
-                # If this is a Task tool, push it onto the stack
+                # If this is a Task tool, create a SubAgent span under it
+                # https://platform.claude.com/docs/en/agent-sdk/python#task
                 if tool_name == "Task":
-                    active_task_stack.append(tool_invocation)
-                    logger.debug(
-                        f"Task span created and pushed: {tool_use_id}, stack depth: {len(active_task_stack)}"
-                    )
+                    # Extract subagent_type from tool input
+                    subagent_type = tool_input.get("subagent_type", "unknown")
+                    task_description = tool_input.get("description", "")
+                    task_prompt = tool_input.get("prompt", "")
+
+                    # Create SubAgent span as child of Task Tool span
+                    subagent_context_token = None
+                    if tool_invocation.span:
+                        try:
+                            subagent_context_token = otel_context.attach(
+                                set_span_in_context(tool_invocation.span)
+                            )
+                        except Exception:
+                            pass
+
+                    try:
+                        # Create input message from task prompt
+                        input_messages = []
+                        if task_prompt:
+                            input_messages.append(
+                                InputMessage(
+                                    role="user",
+                                    parts=[Text(content=task_prompt)],
+                                )
+                            )
+
+                        # Create SubAgent invocation
+                        subagent_invocation = InvokeAgentInvocation(
+                            provider=infer_provider_from_base_url(),
+                            agent_name=subagent_type,
+                            agent_description=task_description,
+                            input_messages=input_messages,
+                        )
+
+                        # Start SubAgent span
+                        handler.start_invoke_agent(subagent_invocation)
+
+                        # Push both Task and SubAgent onto stack as a dict
+                        active_task_stack.append(
+                            {
+                                "task": tool_invocation,
+                                "subagent": subagent_invocation,
+                                "tool_use_id": tool_use_id,
+                            }
+                        )
+
+                        logger.debug(
+                            f"Task span created with SubAgent '{subagent_type}': {tool_use_id}, stack depth: {len(active_task_stack)}"
+                        )
+                    finally:
+                        if subagent_context_token is not None:
+                            try:
+                                otel_context.detach(subagent_context_token)
+                            except Exception:
+                                pass
 
             except Exception as e:
                 logger.warning(
@@ -260,9 +314,13 @@ def _process_assistant_message(
             else:
                 turn_tracker.add_assistant_output(parts)
                 output_msg = OutputMessage(
-                    role="assistant", parts=list(parts), finish_reason="tool_calls"
+                    role="assistant",
+                    parts=list(parts),
+                    finish_reason="tool_calls",
+                )
+                turn_tracker.current_llm_invocation.output_messages.append(
+                    output_msg
                 )
-                turn_tracker.current_llm_invocation.output_messages.append(output_msg)
 
         # Only add to collected_messages if not inside a Task
         if not is_inside_task:
@@ -318,11 +376,76 @@ def _process_user_message(
 
                     # Set tool response
                     tool_content = getattr(block, "content", None)
+                    tool_use_result = getattr(block, "tool_use_result", None)
                     is_error_value = getattr(block, "is_error", None)
                     is_error = is_error_value is True
 
                     tool_invocation.tool_call_result = tool_content
 
+                    # Check if this is a Task tool result - if so, close SubAgent FIRST
+                    # BEFORE closing the Task tool span
+                    # https://platform.claude.com/docs/en/agent-sdk/python#task
+                    is_task_result = (
+                        active_task_stack
+                        and active_task_stack[-1]["tool_use_id"] == tool_use_id
+                    )
+                    if is_task_result:
+                        task_entry = active_task_stack.pop()
+
+                        # Extract information from tool_use_result (official Task tool output format)
+                        # Output format: {"result": str, "usage": dict | None, "total_cost_usd": float | None, "duration_ms": int | None}
+                        if tool_use_result and isinstance(
+                            tool_use_result, dict
+                        ):
+                            # Extract result (str) for output_messages
+                            result_text = tool_use_result.get("result")
+                            if result_text and isinstance(result_text, str):
+                                task_entry["subagent"].output_messages.append(
+                                    OutputMessage(
+                                        role="assistant",
+                                        parts=[Text(content=result_text)],
+                                        finish_reason="stop",
+                                    )
+                                )
+
+                            # Extract usage from tool_use_result
+                            # Always record usage info from official SDK, even if values are 0
+                            usage = tool_use_result.get("usage")
+                            if usage and isinstance(usage, dict):
+                                if "input_tokens" in usage:
+                                    task_entry[
+                                        "subagent"
+                                    ].input_tokens = usage["input_tokens"]
+                                if "output_tokens" in usage:
+                                    task_entry[
+                                        "subagent"
+                                    ].output_tokens = usage["output_tokens"]
+
+                            # Extract additional attributes
+                            if "total_cost_usd" in tool_use_result:
+                                task_entry["subagent"].attributes[
+                                    "total_cost_usd"
+                                ] = tool_use_result["total_cost_usd"]
+                            if "duration_ms" in tool_use_result:
+                                task_entry["subagent"].attributes[
+                                    "duration_ms"
+                                ] = tool_use_result["duration_ms"]
+
+                        # Close SubAgent span first (detach SubAgent context)
+                        # This restores context to Task Tool span level
+                        try:
+                            handler.stop_invoke_agent(task_entry["subagent"])
+                        except Exception as e:
+                            logger.warning(
+                                f"Failed to close SubAgent span: {e}"
+                            )
+
+                        logger.debug(
+                            f"Task span closed: {tool_use_id}, stack depth: {len(active_task_stack)}"
+                        )
+
+                    # Now close the tool span (Task or regular tool)
+                    # For Task: this detaches Task Tool context, restoring to Agent context
                     if is_error:
                         error_msg = (
                             str(tool_content)
@@ -336,18 +459,6 @@ def _process_user_message(
                     else:
                         handler.stop_execute_tool(tool_invocation)
 
-                    # Check if this is a Task tool result - if so, pop from stack
-                    # BEFORE we check is_inside_task for message filtering
-                    is_task_result = (
-                        active_task_stack
-                        and active_task_stack[-1].tool_call_id == tool_use_id
-                    )
-                    if is_task_result:
-                        active_task_stack.pop()
-                        logger.debug(
-                            f"Task span closed and popped: {tool_use_id}, stack depth: {len(active_task_stack)}"
-                        )
-
                 if tool_use_id:
                     tool_parts.append(
                         ToolCallResponse(
@@ -493,10 +604,15 @@ async def _process_agent_invocation_stream(
 
         # Clean up any remaining Task spans in stack (shouldn't happen in normal flow)
         while active_task_stack:
-            task_invocation = active_task_stack.pop()
+            task_entry = active_task_stack.pop()
             logger.warning(
-                f"Unclosed Task span at end of invocation: {task_invocation.tool_call_id}"
+                f"Unclosed Task span at end of invocation: {task_entry['tool_use_id']}"
             )
+            # Close SubAgent span if it exists
+            try:
+                handler.stop_invoke_agent(task_entry["subagent"])
+            except Exception:
+                pass
 
         # Detach empty context token to restore the original context.
         # Note: stop_invoke_agent/fail_invoke_agent already detached invocation.context_token,
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
new file mode 100644
index 000000000..85a254327
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
@@ -0,0 +1,171 @@
+description: 'Documentation Writer Agent using Task tool to invoke general-purpose subagent'
+prompt: 'Use the doc-writer agent to explain what AgentDefinition is used for'
+messages:
+- type: SystemMessage
+  subtype: init
+  data:
+    type: system
+    subtype: init
+    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python
+    session_id: 397137cf-3ade-4272-a0bf-aecf66956d1b
+    tools:
+    - Task
+    - TaskOutput
+    - Bash
+    - Glob
+    - Grep
+    - ExitPlanMode
+    - Read
+    - Edit
+    - Write
+    - NotebookEdit
+    - WebFetch
+    - TodoWrite
+    - WebSearch
+    - TaskStop
+    - AskUserQuestion
+    - Skill
+    - EnterPlanMode
+    - ToolSearch
+    mcp_servers: []
+    model: qwen-plus
+    permissionMode: default
+    slash_commands:
+    - compact
+    - context
+    - cost
+    - init
+    - pr-comments
+    - release-notes
+    - review
+    - security-review
+    apiKeySource: ANTHROPIC_API_KEY
+    claude_code_version: 2.1.19
+    output_style: default
+    agents:
+    - Bash
+    - general-purpose
+    - statusline-setup
+    - Explore
+    - Plan
+    skills: []
+    plugins: []
+    uuid: 49c8bfe4-abe7-4f4b-bc19-b0dc334defc2
+
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: "I'll use the doc-writer agent to explain what AgentDefinition is used for.\n\n"
+  parent_tool_use_id: null
+  error: null
+
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_a1e29785a6c94856bfc3e5
+    name: Task
+    input:
+      description: Explain AgentDefinition purpose
+      prompt: "Explain what AgentDefinition is used for in the context of Claude Agent SDK. Provide a clear, concise explanation of its purpose, when it's used, and what it typically contains."
+      subagent_type: general-purpose
+  parent_tool_use_id: null
+  error: null
+
+- type: UserMessage
+  content:
+  - type: TextBlock
+    text: "Explain what AgentDefinition is used for in the context of Claude Agent SDK. Provide a clear, concise explanation of its purpose, when it's used, and what it typically contains."
+  uuid: 62f08d13-8455-40ee-95d2-bbaf0eef9162
+  parent_tool_use_id: call_a1e29785a6c94856bfc3e5
+
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_5c58f3a6922d407ca343bf
+    name: Grep
+    input:
+      pattern: AgentDefinition
+      output_mode: files_with_matches
+  parent_tool_use_id: call_a1e29785a6c94856bfc3e5
+  error: null
+
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_5c58f3a6922d407ca343bf
+    content: 'Found 12 files'
+    is_error: false
+  uuid: 4d3458b0-4475-47ad-b1da-84a8b12d7c79
+  parent_tool_use_id: call_a1e29785a6c94856bfc3e5
+
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: ToolUseBlock
+    id: call_c51d222a2f1049b5b3fd7d
+    name: Read
+    input:
+      file_path: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/src/claude_agent_sdk/types.py
+  parent_tool_use_id: call_a1e29785a6c94856bfc3e5
+  error: null
+
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_c51d222a2f1049b5b3fd7d
+    content: 'AgentDefinition class definition content...'
+    is_error: false
+  uuid: 8fbd773e-7def-4570-96c8-e307f45f9b47
+  parent_tool_use_id: call_a1e29785a6c94856bfc3e5
+
+- type: UserMessage
+  content:
+  - type: ToolResultBlock
+    tool_use_id: call_a1e29785a6c94856bfc3e5
+    content:
+    - type: text
+      text: "Based on my analysis of the Claude Agent SDK codebase, here's a clear explanation of what `AgentDefinition` is used for..."
+    - type: text
+      text: "agentId: ada4edf (for resuming to continue this agent's work if needed)"
+    is_error: false
+    tool_use_result:
+      status: completed
+      prompt: "Explain what AgentDefinition is used for in the context of Claude Agent SDK. Provide a clear, concise explanation of its purpose, when it's used, and what it typically contains."
+      agentId: ada4edf
+      content:
+      - type: text
+        text: "Based on my analysis of the Claude Agent SDK codebase, here's a clear explanation..."
+      totalDurationMs: 33079
+      totalTokens: 0
+      totalToolUseCount: 5
+      usage:
+        input_tokens: 0
+        output_tokens: 0
+  uuid: 9c09e979-6d8a-4f7f-80b5-4852305cfa92
+  parent_tool_use_id: null
+
+- type: AssistantMessage
+  model: qwen-plus
+  content:
+  - type: TextBlock
+    text: "Based on my analysis of the Claude Agent SDK codebase, `AgentDefinition` is a dataclass used to define the configuration for custom agents..."
+  parent_tool_use_id: null
+  error: null
+
+- type: ResultMessage
+  subtype: success
+  duration_ms: 43887
+  duration_api_ms: 42421
+  is_error: false
+  num_turns: 2
+  session_id: 397137cf-3ade-4272-a0bf-aecf66956d1b
+  total_cost_usd: 0.454974
+  usage:
+    input_tokens: 28964
+    cache_creation_input_tokens: 0
+    cache_read_input_tokens: 0
+    output_tokens: 294
+  result: "Based on my analysis of the Claude Agent SDK codebase..."
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
index fcb88fd53..515d097fb 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_attributes.py
@@ -304,4 +304,3 @@ async def test_span_attributes_no_sensitive_data(
             if isinstance(attr_value, str):
                 # Sensitive content should not be in attributes
                 assert "secret123" not in attr_value.lower()
-
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py
new file mode 100644
index 000000000..1a27d345e
--- /dev/null
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py
@@ -0,0 +1,476 @@
+"""Comprehensive tests for Task tool and SubAgent span instrumentation using real message data.
+
+These tests use actual message streams from real Claude Agent SDK executions
+stored in cassette files to validate SubAgent span functionality:
+- SubAgent span creation and hierarchy
+- SubAgent span attributes (name, description, prompt, result, usage, cost, duration)
+- Context propagation between Task and SubAgent spans
+- Correct parent-child relationships
+"""
+
+from pathlib import Path
+from typing import Any, AsyncIterator, Dict, List
+from unittest.mock import MagicMock
+
+import pytest
+import yaml
+
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
+from opentelemetry.semconv._incubating.attributes import (
+    gen_ai_attributes as GenAIAttributes,
+)
+
+
+# ============================================================================
+# Helper Functions - Load Real Message Data from Cassettes
+# ============================================================================
+
+
+def load_cassette(filename: str) -> Dict[str, Any]:
+    """Load a test case from cassettes directory."""
+    cassette_path = Path(__file__).parent / "cassettes" / filename
+    with open(cassette_path, "r", encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+
+def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
+    """Create a mock message object from test data dictionary."""
+    mock_msg = MagicMock()
+    msg_type = message_data["type"]
+
+    mock_msg.__class__.__name__ = msg_type
+
+    if msg_type == "SystemMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.data = message_data["data"]
+
+    elif msg_type == "AssistantMessage":
+        mock_msg.model = message_data["model"]
+        mock_msg.content = []
+
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            block_type = block_data["type"]
+            mock_block.__class__.__name__ = block_type
+
+            if block_type == "TextBlock":
+                mock_block.text = block_data["text"]
+            elif block_type == "ToolUseBlock":
+                mock_block.id = block_data["id"]
+                mock_block.name = block_data["name"]
+                mock_block.input = block_data["input"]
+
+            mock_msg.content.append(mock_block)
+
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        mock_msg.error = message_data.get("error")
+
+    elif msg_type == "UserMessage":
+        mock_msg.content = []
+
+        for block_data in message_data["content"]:
+            mock_block = MagicMock()
+            mock_block.__class__.__name__ = block_data["type"]
+
+            if block_data["type"] == "ToolResultBlock":
+                mock_block.tool_use_id = block_data["tool_use_id"]
+                mock_block.content = block_data["content"]
+                mock_block.is_error = block_data["is_error"]
+                # Support tool_use_result for Task tools
+                if "tool_use_result" in block_data:
+                    mock_block.tool_use_result = block_data["tool_use_result"]
+            elif block_data["type"] == "TextBlock":
+                mock_block.text = block_data.get("text", "")
+
+            mock_msg.content.append(mock_block)
+
+        mock_msg.uuid = message_data.get("uuid")
+        mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+
+    elif msg_type == "ResultMessage":
+        mock_msg.subtype = message_data["subtype"]
+        mock_msg.duration_ms = message_data["duration_ms"]
+        mock_msg.duration_api_ms = message_data.get("duration_api_ms")
+        mock_msg.is_error = message_data["is_error"]
+        mock_msg.num_turns = message_data["num_turns"]
+        mock_msg.session_id = message_data.get("session_id")
+        mock_msg.total_cost_usd = message_data["total_cost_usd"]
+        mock_msg.usage = message_data["usage"]
+        mock_msg.result = message_data["result"]
+        mock_msg.structured_output = message_data.get("structured_output")
+
+    return mock_msg
+
+
+async def create_mock_stream_from_messages(
+    messages: List[Dict[str, Any]],
+) -> AsyncIterator[Any]:
+    """Create a mock async stream of messages."""
+    for message_data in messages:
+        yield create_mock_message_from_data(message_data)
+
+
+def find_spans_by_operation(spans, operation_name):
+    """Find spans by gen_ai.operation.name attribute."""
+    return [
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == operation_name
+    ]
+
+
+def find_task_tool_spans(spans):
+    """Find all Task tool spans."""
+    tool_spans = find_spans_by_operation(spans, "execute_tool")
+    return [
+        s
+        for s in tool_spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_TOOL_NAME)
+        == "Task"
+    ]
+
+
+def find_subagent_spans(spans):
+    """Find all SubAgent spans (invoke_agent spans that are children of Task tool spans)."""
+    agent_spans = find_spans_by_operation(spans, "invoke_agent")
+    task_spans = find_task_tool_spans(spans)
+    task_span_ids = {s.context.span_id for s in task_spans}
+
+    subagent_spans = []
+    for agent_span in agent_spans:
+        if agent_span.parent and agent_span.parent.span_id in task_span_ids:
+            subagent_spans.append(agent_span)
+
+    return subagent_spans
+
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def tracer_provider():
+    """Create a tracer provider for testing."""
+    return TracerProvider()
+
+
+@pytest.fixture
+def span_exporter(tracer_provider):
+    """Create an in-memory span exporter."""
+    exporter = InMemorySpanExporter()
+    tracer_provider.add_span_processor(SimpleSpanProcessor(exporter))
+    return exporter
+
+
+@pytest.fixture
+def instrument(tracer_provider):
+    """Instrument the Claude Agent SDK."""
+    from opentelemetry.instrumentation.claude_agent_sdk import (  # noqa: PLC0415
+        ClaudeAgentSDKInstrumentor,
+    )
+
+    instrumentor = ClaudeAgentSDKInstrumentor()
+    instrumentor.instrument(tracer_provider=tracer_provider)
+    yield instrumentor
+    instrumentor.uninstrument()
+
+
+# ============================================================================
+# Tests - SubAgent Span Creation and Hierarchy with Real Data
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_subagent_span_creation_from_task_tool(
+    instrument, span_exporter, tracer_provider
+):
+    """Verify SubAgent span is created when Task tool is used (using real data).
+
+    This test uses actual message data from a Documentation Writer example
+    where a Task tool was used to invoke a general-purpose subagent.
+
+    Validates:
+    1. Task tool span exists
+    2. SubAgent span exists
+    3. SubAgent is child of Task tool span
+    4. SubAgent has correct operation name
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    test_case = load_cassette("test_doc_writer_with_task.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    task_spans = find_task_tool_spans(spans)
+    subagent_spans = find_subagent_spans(spans)
+
+    # Verify Task tool span exists
+    assert len(task_spans) == 1, "Should have exactly one Task tool span"
+    task_span = task_spans[0]
+
+    # Verify SubAgent span exists
+    assert len(subagent_spans) == 1, "Should have exactly one SubAgent span"
+    subagent_span = subagent_spans[0]
+
+    # Verify SubAgent is child of Task
+    assert subagent_span.parent is not None, "SubAgent should have a parent"
+    assert subagent_span.parent.span_id == task_span.context.span_id, (
+        "SubAgent's parent should be Task tool span"
+    )
+
+    # Verify SubAgent operation name
+    attrs = dict(subagent_span.attributes or {})
+    assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
+
+    print("\n✅ SubAgent span creation test passed (real cassette data)")
+    print(f"  - Task span: {task_span.name}")
+    print(f"  - SubAgent span: {subagent_span.name}")
+
+
+@pytest.mark.asyncio
+async def test_subagent_span_name_from_task_input(
+    instrument, span_exporter, tracer_provider
+):
+    """Verify SubAgent span name is derived from subagent_type in Task input.
+
+    The SubAgent span name should be: invoke_agent {subagent_type}
+    where subagent_type comes from the Task tool's input.
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    test_case = load_cassette("test_doc_writer_with_task.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    subagent_spans = find_subagent_spans(spans)
+
+    assert len(subagent_spans) == 1
+    subagent_span = subagent_spans[0]
+
+    # SubAgent name should contain "general-purpose" from Task input
+    expected_subagent_type = "general-purpose"
+    assert expected_subagent_type in subagent_span.name, (
+        f"SubAgent span name should contain '{expected_subagent_type}', got: {subagent_span.name}"
+    )
+
+    print(f"\n✅ SubAgent span name test passed: {subagent_span.name}")
+
+
+@pytest.mark.asyncio
+async def test_subagent_span_input_attributes(
+    instrument, span_exporter, tracer_provider
+):
+    """Verify SubAgent span captures input attributes from Task tool input.
+
+    Validates:
+    1. gen_ai.agent.name = subagent_type from Task input
+    2. gen_ai.agent.description = description from Task input
+    3. Provider name is set
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    test_case = load_cassette("test_doc_writer_with_task.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    subagent_spans = find_subagent_spans(spans)
+
+    assert len(subagent_spans) == 1
+    subagent_span = subagent_spans[0]
+    attrs = dict(subagent_span.attributes or {})
+
+    # Verify agent name (should be "general-purpose" from Task input)
+    assert GenAIAttributes.GEN_AI_AGENT_NAME in attrs
+    assert attrs[GenAIAttributes.GEN_AI_AGENT_NAME] == "general-purpose"
+
+    # Verify agent description (should be "Explain AgentDefinition purpose" from Task input)
+    assert GenAIAttributes.GEN_AI_AGENT_DESCRIPTION in attrs
+    assert "AgentDefinition" in attrs[GenAIAttributes.GEN_AI_AGENT_DESCRIPTION]
+
+    # Verify provider name is set
+    assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
+
+    print("\n✅ SubAgent input attributes test passed")
+    print(f"  - Agent name: {attrs[GenAIAttributes.GEN_AI_AGENT_NAME]}")
+    print(f"  - Description: {attrs[GenAIAttributes.GEN_AI_AGENT_DESCRIPTION]}")
+    print(f"  - Provider: {attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME]}")
+
+
+@pytest.mark.asyncio
+async def test_subagent_span_output_attributes_with_tool_use_result(
+    instrument, span_exporter, tracer_provider
+):
+    """Verify SubAgent span captures output from tool_use_result in Task result.
+
+    The real data includes tool_use_result with:
+    - usage: {input_tokens, output_tokens}
+    - totalDurationMs
+    - totalCostUsd (in test data this is 0, but structure is validated)
+
+    Validates:
+    1. Span completes successfully (has end_time)
+    2. Duration and cost attributes are present
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    test_case = load_cassette("test_doc_writer_with_task.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    subagent_spans = find_subagent_spans(spans)
+
+    assert len(subagent_spans) == 1
+    subagent_span = subagent_spans[0]
+    attrs = dict(subagent_span.attributes or {})
+
+    # Verify span completed successfully
+    assert subagent_span.end_time is not None
+    assert subagent_span.end_time > subagent_span.start_time
+
+    # Verify duration_ms attribute from tool_use_result
+    assert "duration_ms" in attrs
+    assert attrs["duration_ms"] == 33079
+
+    # Verify token usage attributes (even if 0, they should be recorded)
+    # Note: These may not always appear in the span depending on OTel exporter behavior
+    # but we verify the structure is correct
+
+    print("\n✅ SubAgent output attributes test passed")
+    print(f"  - Span duration: {(subagent_span.end_time - subagent_span.start_time) / 1e9:.3f}s")
+    print(f"  - Task duration (ms): {attrs.get('duration_ms', 'N/A')}")
+    if "total_cost_usd" in attrs:
+        print(f"  - Total cost: ${attrs['total_cost_usd']}")
+
+
+@pytest.mark.asyncio
+async def test_subagent_span_hierarchy_and_context(
+    instrument, span_exporter, tracer_provider
+):
+    """Verify span hierarchy and context propagation with Task and SubAgent.
+
+    Validates:
+    1. Root agent span exists
+    2. Task tool span is child of root agent
+    3. SubAgent span is child of Task tool span
+    4. Internal tool calls (Grep, Read) are children of SubAgent
+    5. Spans after Task completion are siblings of Task, not children
+    """
+    from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
+        _process_agent_invocation_stream,
+    )
+    from opentelemetry.util.genai.extended_handler import (  # noqa: PLC0415
+        ExtendedTelemetryHandler,
+    )
+
+    test_case = load_cassette("test_doc_writer_with_task.yaml")
+    handler = ExtendedTelemetryHandler(tracer_provider=tracer_provider)
+    mock_stream = create_mock_stream_from_messages(test_case["messages"])
+
+    async for _ in _process_agent_invocation_stream(
+        wrapped_stream=mock_stream,
+        handler=handler,
+        model="qwen-plus",
+        prompt=test_case["prompt"],
+    ):
+        pass
+
+    spans = span_exporter.get_finished_spans()
+    agent_spans = find_spans_by_operation(spans, "invoke_agent")
+    task_spans = find_task_tool_spans(spans)
+    subagent_spans = find_subagent_spans(spans)
+    tool_spans = find_spans_by_operation(spans, "execute_tool")
+
+    # Find root agent span (no parent)
+    root_agent = [s for s in agent_spans if s.parent is None][0]
+
+    # Verify hierarchy
+    assert len(task_spans) == 1
+    task_span = task_spans[0]
+
+    assert len(subagent_spans) == 1
+    subagent_span = subagent_spans[0]
+
+    # Task span should be child of root agent
+    assert task_span.parent is not None
+    assert task_span.parent.span_id == root_agent.context.span_id
+
+    # SubAgent span should be child of Task span
+    assert subagent_span.parent is not None
+    assert subagent_span.parent.span_id == task_span.context.span_id
+
+    # Find tool spans that are children of SubAgent (Grep, Read)
+    subagent_child_tools = [
+        s for s in tool_spans
+        if s.parent and s.parent.span_id == subagent_span.context.span_id
+    ]
+
+    # Should have internal tool calls (Grep, Read)
+    assert len(subagent_child_tools) >= 2, "SubAgent should have child tool spans (Grep, Read)"
+
+    print("\n✅ SubAgent hierarchy test passed")
+    print(f"  - Root agent: {root_agent.name}")
+    print(f"  - Task span: {task_span.name} (child of root)")
+    print(f"  - SubAgent span: {subagent_span.name} (child of Task)")
+    print(f"  - SubAgent child tools: {len(subagent_child_tools)}")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
index 68307d10b..3cffc002f 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
@@ -199,15 +199,38 @@ async def test_agent_invocation_with_cassette(
     )
 
     # Verify Agent span exists
+    # Note: When Task tool is used, there will be a root agent + SubAgent span
     agent_spans = [
         s
         for s in spans
         if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
         == "invoke_agent"
     ]
-    assert len(agent_spans) == 1, (
-        f"Should have one Agent span for {cassette_file}"
-    )
+
+    # Find Task tool spans to determine if SubAgent is expected
+    tool_spans = [
+        s
+        for s in spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_OPERATION_NAME)
+        == "execute_tool"
+    ]
+    task_spans = [
+        s
+        for s in tool_spans
+        if dict(s.attributes or {}).get(GenAIAttributes.GEN_AI_TOOL_NAME)
+        == "Task"
+    ]
+
+    # If Task tool is used, expect root agent + SubAgent spans
+    if len(task_spans) > 0:
+        assert len(agent_spans) >= 1, (
+            f"Should have at least one Agent span for {cassette_file}"
+        )
+    else:
+        # No Task tool, expect only root agent
+        assert len(agent_spans) == 1, (
+            f"Should have one Agent span for {cassette_file}"
+        )
 
     # Verify LLM spans exist
     llm_spans = [
@@ -222,5 +245,5 @@ async def test_agent_invocation_with_cassette(
 
     print(
         f"\n✅ {cassette_file}: {len(spans)} spans "
-        f"(Agent: 1, LLM: {len(llm_spans)})"
+        f"(Agent: {len(agent_spans)}, LLM: {len(llm_spans)}, Task: {len(task_spans)})"
     )

From cb707fd70fe63b9b1837c592f5bb3c1d1a4a9cbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Tue, 27 Jan 2026 09:53:20 +0800
Subject: [PATCH 21/23] [feat] support subagent

Change-Id: If7c6314141852aa249d73bf22316aa1aab486b21
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/patch.py | 97 +++++++++++++------
 .../cassettes/test_doc_writer_with_task.yaml  | 26 ++---
 .../tests/test_span_validation.py             | 20 ----
 .../tests/test_task_subagent_real_data.py     | 63 +++++-------
 .../tests/test_with_cassettes.py              |  5 -
 5 files changed, 107 insertions(+), 104 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index c0b30423d..09d943b56 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -288,6 +288,7 @@ def _process_assistant_message(
             collected_messages,
             provider=infer_provider_from_base_url(),
             message_arrival_time=message_arrival_time,
+            agent_invocation=agent_invocation,
         )
 
         if parts:
@@ -365,6 +366,8 @@ def _process_user_message(
     # Check if we're inside a Task
     is_inside_task = len(active_task_stack) > 0
 
+    msg_tool_use_result = getattr(msg, "tool_use_result", None)
+
     if hasattr(msg, "content"):
         for block in msg.content:
             block_type = type(block).__name__
@@ -376,7 +379,8 @@ def _process_user_message(
 
                     # Set tool response
                     tool_content = getattr(block, "content", None)
-                    tool_use_result = getattr(block, "tool_use_result", None)
+                    # tool_use_result is on the UserMessage, not on ToolResultBlock!
+                    tool_use_result = msg_tool_use_result
                     is_error_value = getattr(block, "is_error", None)
                     is_error = is_error_value is True
 
@@ -393,20 +397,39 @@ def _process_user_message(
                         task_entry = active_task_stack.pop()
 
                         # Extract information from tool_use_result (official Task tool output format)
-                        # Output format: {"result": str, "usage": dict | None, "total_cost_usd": float | None, "duration_ms": int | None}
                         if tool_use_result and isinstance(
                             tool_use_result, dict
                         ):
-                            # Extract result (str) for output_messages
-                            result_text = tool_use_result.get("result")
-                            if result_text and isinstance(result_text, str):
-                                task_entry["subagent"].output_messages.append(
-                                    OutputMessage(
-                                        role="assistant",
-                                        parts=[Text(content=result_text)],
-                                        finish_reason="stop",
+                            agent_id = tool_use_result.get("agentId")
+                            if agent_id:
+                                task_entry["subagent"].agent_id = agent_id
+
+                            # Extract result for output_messages
+                            content_blocks = tool_use_result.get("content")
+                            if content_blocks and isinstance(
+                                content_blocks, list
+                            ):
+                                # Convert content blocks to Text parts
+                                text_parts = []
+                                for block in content_blocks:
+                                    if isinstance(block, dict):
+                                        if block.get("type") == "text":
+                                            text_content = block.get("text")
+                                            if text_content:
+                                                text_parts.append(
+                                                    Text(content=text_content)
+                                                )
+
+                                if text_parts:
+                                    task_entry[
+                                        "subagent"
+                                    ].output_messages.append(
+                                        OutputMessage(
+                                            role="assistant",
+                                            parts=text_parts,
+                                            finish_reason="stop",
+                                        )
                                     )
-                                )
 
                             # Extract usage from tool_use_result
                             # Always record usage info from official SDK, even if values are 0
@@ -420,16 +443,10 @@ def _process_user_message(
                                     task_entry[
                                         "subagent"
                                     ].output_tokens = usage["output_tokens"]
-
-                            # Extract additional attributes
-                            if "total_cost_usd" in tool_use_result:
-                                task_entry["subagent"].attributes[
-                                    "total_cost_usd"
-                                ] = tool_use_result["total_cost_usd"]
-                            if "duration_ms" in tool_use_result:
-                                task_entry["subagent"].attributes[
-                                    "duration_ms"
-                                ] = tool_use_result["duration_ms"]
+                        else:
+                            logger.warning(
+                                f"[SubAgent] tool_use_result is not a dict: {type(tool_use_result)}, value: {tool_use_result}"
+                            )
 
                         # Close SubAgent span first (detach SubAgent context)
                         # This restores context to Task Tool span level
@@ -440,10 +457,6 @@ def _process_user_message(
                                 f"Failed to close SubAgent span: {e}"
                             )
 
-                        logger.debug(
-                            f"Task span closed: {tool_use_id}, stack depth: {len(active_task_stack)}"
-                        )
-
                     # Now close the tool span (Task or regular tool)
                     # For Task: this detaches Task Tool context, restoring to Agent context
                     if is_error:
@@ -503,14 +516,29 @@ def _process_user_message(
     turn_tracker.mark_next_llm_start()
 
 
+def _process_system_message(
+    msg: Any,
+    agent_invocation: InvokeAgentInvocation,
+) -> None:
+    """Process SystemMessage: extract session_id early in the stream.
+
+    SystemMessage appears at the beginning of the message stream and contains
+    the session_id in its data field. We extract it here so that it's available
+    for all subsequent LLM spans.
+    """
+    if hasattr(msg, "subtype") and msg.subtype == "init":
+        if hasattr(msg, "data") and isinstance(msg.data, dict):
+            session_id = msg.data.get("session_id")
+            if session_id:
+                agent_invocation.conversation_id = session_id
+
+
 def _process_result_message(
     msg: Any,
     agent_invocation: InvokeAgentInvocation,
     turn_tracker: "AssistantTurnTracker",
 ) -> None:
-    """Process ResultMessage: update session_id, token usage, and close any open LLM turn."""
-    if hasattr(msg, "session_id") and msg.session_id:
-        agent_invocation.conversation_id = msg.session_id
+    """Process ResultMessage: update session_id (fallback), token usage, and close any open LLM turn."""
 
     _update_token_usage(agent_invocation, turn_tracker, msg)
 
@@ -563,7 +591,9 @@ async def _process_agent_invocation_stream(
         async for msg in wrapped_stream:
             msg_type = type(msg).__name__
 
-            if msg_type == "AssistantMessage":
+            if msg_type == "SystemMessage":
+                _process_system_message(msg, agent_invocation)
+            elif msg_type == "AssistantMessage":
                 _process_assistant_message(
                     msg,
                     model,
@@ -642,6 +672,7 @@ def start_llm_turn(
         collected_messages: List[Dict[str, Any]],
         provider: str = "anthropic",
         message_arrival_time: Optional[float] = None,
+        agent_invocation: Optional[InvokeAgentInvocation] = None,
     ) -> Optional[LLMInvocation]:
         """Start a new LLM invocation span with pre-recorded start time.
 
@@ -649,6 +680,7 @@ def start_llm_turn(
             message_arrival_time: The time when the AssistantMessage arrived.
                 If next_llm_start_time is set (from previous UserMessage), use that.
                 Otherwise, use message_arrival_time or fall back to current time.
+            agent_invocation: The parent agent invocation, used to extract conversation_id.
         """
         # Priority: next_llm_start_time > message_arrival_time > current time
         start_time = (
@@ -689,6 +721,13 @@ def start_llm_turn(
             input_messages=input_messages,
         )
 
+        # Add conversation_id (session_id) to LLM span attributes
+        # This is a custom extension beyond standard GenAI semantic conventions
+        if agent_invocation and agent_invocation.conversation_id:
+            llm_invocation.attributes["gen_ai.conversation.id"] = (
+                agent_invocation.conversation_id
+            )
+
         self.handler.start_llm(llm_invocation)
         # TODO(telemetry): Use public API for setting span start time
         if llm_invocation.span and start_time:
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
index 85a254327..1da6cdebf 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
@@ -131,21 +131,21 @@ messages:
     - type: text
       text: "agentId: ada4edf (for resuming to continue this agent's work if needed)"
     is_error: false
-    tool_use_result:
-      status: completed
-      prompt: "Explain what AgentDefinition is used for in the context of Claude Agent SDK. Provide a clear, concise explanation of its purpose, when it's used, and what it typically contains."
-      agentId: ada4edf
-      content:
-      - type: text
-        text: "Based on my analysis of the Claude Agent SDK codebase, here's a clear explanation..."
-      totalDurationMs: 33079
-      totalTokens: 0
-      totalToolUseCount: 5
-      usage:
-        input_tokens: 0
-        output_tokens: 0
   uuid: 9c09e979-6d8a-4f7f-80b5-4852305cfa92
   parent_tool_use_id: null
+  tool_use_result:
+    status: completed
+    prompt: "Explain what AgentDefinition is used for in the context of Claude Agent SDK. Provide a clear, concise explanation of its purpose, when it's used, and what it typically contains."
+    agentId: ada4edf
+    content:
+    - type: text
+      text: "Based on my analysis of the Claude Agent SDK codebase, here's a clear explanation..."
+    totalDurationMs: 33079
+    totalTokens: 0
+    totalToolUseCount: 5
+    usage:
+      input_tokens: 0
+      output_tokens: 0
 
 - type: AssistantMessage
   model: qwen-plus
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
index 524edfe1b..a9d6fbd73 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_span_validation.py
@@ -257,15 +257,6 @@ async def test_agent_span_correctness(
         "Should have output_tokens"
     )
 
-    print(f"\n✅ Agent span validation passed ({cassette_file})")
-    print(f"  - Span name: {agent_span.name}")
-    print(
-        f"  - Input tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS)}"
-    )
-    print(
-        f"  - Output tokens: {attrs.get(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS)}"
-    )
-
 
 # ============================================================================
 # Tests - LLM Span
@@ -361,9 +352,6 @@ async def test_llm_span_correctness(
                                     )
                                     tool_call_ids.append(tool_call_id)
 
-    print(f"\n✅ LLM span validation passed ({cassette_file})")
-    print(f"  - LLM span count: {len(llm_spans)}")
-
 
 # ============================================================================
 # Tests - Tool Span
@@ -445,9 +433,6 @@ async def test_tool_span_correctness(
             "Should have tool.call.id"
         )
 
-    print(f"\n✅ Tool span validation passed ({cassette_file})")
-    print(f"  - Tool span count: {len(tool_spans)}")
-
 
 # ============================================================================
 # Tests - Span Hierarchy
@@ -523,8 +508,3 @@ async def test_span_hierarchy_correctness(
             assert tool_span.parent.span_id != llm_span.context.span_id, (
                 "Tool span should not be a child of LLM span"
             )
-
-    print(f"\n✅ Span hierarchy validation passed ({cassette_file})")
-    print(f"  - Agent span: {agent_span.name} (root span)")
-    print(f"  - LLM spans: {len(llm_spans)} (children of Agent)")
-    print(f"  - Tool spans: {len(tool_spans)} (children of Agent)")
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py
index 1a27d345e..9e56667b5 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_task_subagent_real_data.py
@@ -24,7 +24,6 @@
     gen_ai_attributes as GenAIAttributes,
 )
 
-
 # ============================================================================
 # Helper Functions - Load Real Message Data from Cassettes
 # ============================================================================
@@ -80,9 +79,6 @@ def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
                 mock_block.tool_use_id = block_data["tool_use_id"]
                 mock_block.content = block_data["content"]
                 mock_block.is_error = block_data["is_error"]
-                # Support tool_use_result for Task tools
-                if "tool_use_result" in block_data:
-                    mock_block.tool_use_result = block_data["tool_use_result"]
             elif block_data["type"] == "TextBlock":
                 mock_block.text = block_data.get("text", "")
 
@@ -90,6 +86,7 @@ def create_mock_message_from_data(message_data: Dict[str, Any]) -> Any:
 
         mock_msg.uuid = message_data.get("uuid")
         mock_msg.parent_tool_use_id = message_data.get("parent_tool_use_id")
+        mock_msg.tool_use_result = message_data.get("tool_use_result")
 
     elif msg_type == "ResultMessage":
         mock_msg.subtype = message_data["subtype"]
@@ -242,10 +239,6 @@ async def test_subagent_span_creation_from_task_tool(
     attrs = dict(subagent_span.attributes or {})
     assert attrs.get(GenAIAttributes.GEN_AI_OPERATION_NAME) == "invoke_agent"
 
-    print("\n✅ SubAgent span creation test passed (real cassette data)")
-    print(f"  - Task span: {task_span.name}")
-    print(f"  - SubAgent span: {subagent_span.name}")
-
 
 @pytest.mark.asyncio
 async def test_subagent_span_name_from_task_input(
@@ -287,8 +280,6 @@ async def test_subagent_span_name_from_task_input(
         f"SubAgent span name should contain '{expected_subagent_type}', got: {subagent_span.name}"
     )
 
-    print(f"\n✅ SubAgent span name test passed: {subagent_span.name}")
-
 
 @pytest.mark.asyncio
 async def test_subagent_span_input_attributes(
@@ -338,11 +329,6 @@ async def test_subagent_span_input_attributes(
     # Verify provider name is set
     assert GenAIAttributes.GEN_AI_PROVIDER_NAME in attrs
 
-    print("\n✅ SubAgent input attributes test passed")
-    print(f"  - Agent name: {attrs[GenAIAttributes.GEN_AI_AGENT_NAME]}")
-    print(f"  - Description: {attrs[GenAIAttributes.GEN_AI_AGENT_DESCRIPTION]}")
-    print(f"  - Provider: {attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME]}")
-
 
 @pytest.mark.asyncio
 async def test_subagent_span_output_attributes_with_tool_use_result(
@@ -352,12 +338,16 @@ async def test_subagent_span_output_attributes_with_tool_use_result(
 
     The real data includes tool_use_result with:
     - usage: {input_tokens, output_tokens}
-    - totalDurationMs
-    - totalCostUsd (in test data this is 0, but structure is validated)
+    - agentId
+    - content (output messages)
 
     Validates:
     1. Span completes successfully (has end_time)
-    2. Duration and cost attributes are present
+    2. Token usage attributes are present (from tool_use_result.usage)
+    3. Agent ID is captured
+
+    Note: SubAgent span does NOT record duration_ms or cost attributes.
+    These are managed at the parent Agent level via ResultMessage.
     """
     from opentelemetry.instrumentation.claude_agent_sdk.patch import (  # noqa: PLC0415
         _process_agent_invocation_stream,
@@ -389,19 +379,21 @@ async def test_subagent_span_output_attributes_with_tool_use_result(
     assert subagent_span.end_time is not None
     assert subagent_span.end_time > subagent_span.start_time
 
-    # Verify duration_ms attribute from tool_use_result
-    assert "duration_ms" in attrs
-    assert attrs["duration_ms"] == 33079
+    # Verify agent ID was captured from tool_use_result
+    assert "gen_ai.agent.id" in attrs
+    assert attrs["gen_ai.agent.id"] == "ada4edf"
 
-    # Verify token usage attributes (even if 0, they should be recorded)
-    # Note: These may not always appear in the span depending on OTel exporter behavior
-    # but we verify the structure is correct
+    # Verify token usage attributes (from tool_use_result.usage)
+    # In this test case, both are 0, but they should be present in attributes
+    assert "gen_ai.usage.input_tokens" in attrs
+    assert attrs["gen_ai.usage.input_tokens"] == 0
+    assert "gen_ai.usage.output_tokens" in attrs
+    assert attrs["gen_ai.usage.output_tokens"] == 0
 
-    print("\n✅ SubAgent output attributes test passed")
-    print(f"  - Span duration: {(subagent_span.end_time - subagent_span.start_time) / 1e9:.3f}s")
-    print(f"  - Task duration (ms): {attrs.get('duration_ms', 'N/A')}")
-    if "total_cost_usd" in attrs:
-        print(f"  - Total cost: ${attrs['total_cost_usd']}")
+    # Verify basic agent attributes are present
+    assert "gen_ai.agent.name" in attrs
+    assert "gen_ai.operation.name" in attrs
+    assert attrs["gen_ai.operation.name"] == "invoke_agent"
 
 
 @pytest.mark.asyncio
@@ -462,15 +454,12 @@ async def test_subagent_span_hierarchy_and_context(
 
     # Find tool spans that are children of SubAgent (Grep, Read)
     subagent_child_tools = [
-        s for s in tool_spans
+        s
+        for s in tool_spans
         if s.parent and s.parent.span_id == subagent_span.context.span_id
     ]
 
     # Should have internal tool calls (Grep, Read)
-    assert len(subagent_child_tools) >= 2, "SubAgent should have child tool spans (Grep, Read)"
-
-    print("\n✅ SubAgent hierarchy test passed")
-    print(f"  - Root agent: {root_agent.name}")
-    print(f"  - Task span: {task_span.name} (child of root)")
-    print(f"  - SubAgent span: {subagent_span.name} (child of Task)")
-    print(f"  - SubAgent child tools: {len(subagent_child_tools)}")
+    assert len(subagent_child_tools) >= 2, (
+        "SubAgent should have child tool spans (Grep, Read)"
+    )
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
index 3cffc002f..b65d18160 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/test_with_cassettes.py
@@ -242,8 +242,3 @@ async def test_agent_invocation_with_cassette(
     assert len(llm_spans) > 0, (
         f"Should have at least one LLM span for {cassette_file}"
     )
-
-    print(
-        f"\n✅ {cassette_file}: {len(spans)} spans "
-        f"(Agent: {len(agent_spans)}, LLM: {len(llm_spans)}, Task: {len(task_spans)})"
-    )

From 56b5dc21fd5d0741c47988eb3f33615fa1faf7e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Tue, 27 Jan 2026 10:25:40 +0800
Subject: [PATCH 22/23] [fix] add test dependency pyyaml

Change-Id: I5d569a4ef592ad2802984e43dbd74a27e8b309a5
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../tests/requirements.latest.txt                                | 1 +
 .../tests/requirements.oldest.txt                                | 1 +
 2 files changed, 2 insertions(+)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
index 669932732..24b0ae772 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.latest.txt
@@ -18,6 +18,7 @@
 claude-agent-sdk>=0.1.0
 pytest==7.4.4
 pytest-asyncio==0.21.0
+pyyaml>=6.0
 wrapt==1.17.3
 opentelemetry-exporter-otlp-proto-http
 
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
index 93c3c2ebe..6e11205fb 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt
@@ -18,6 +18,7 @@
 claude-agent-sdk>=0.1.0
 pytest==7.4.4
 pytest-asyncio==0.21.0
+pyyaml>=6.0
 wrapt==1.17.3
 opentelemetry-exporter-otlp-proto-http~=1.30
 opentelemetry-api==1.37

From 847dd207dd438a238d5e0eb3f71d56d21d10a9b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B5=81=E5=B1=BF?= <caishipeng.csp@alibaba-inc.com>
Date: Wed, 28 Jan 2026 10:23:47 +0800
Subject: [PATCH 23/23] [fix] improve code quality and fix variable naming
 issues

Change-Id: I669754b5786c00b67e31d57bb40f8ded89088bd0
Co-developed-by: Cursor <noreply@cursor.com>
---
 .../instrumentation/claude_agent_sdk/patch.py | 20 +++++++++++--------
 .../cassettes/test_doc_writer_with_task.yaml  |  4 ++--
 .../tests/cassettes/test_echo_command.yaml    |  2 +-
 .../tests/cassettes/test_foo_sh_command.yaml  | 12 +++++------
 .../tests/cassettes/test_pretooluse_hook.yaml | 12 +++++------
 5 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
index 09d943b56..8477b6950 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/src/opentelemetry/instrumentation/claude_agent_sdk/patch.py
@@ -140,6 +140,8 @@ def _create_tool_spans_from_message(
                 set_span_in_context(parent_span)
             )
         except Exception:
+            # If attaching the parent context fails, continue without it.
+            # Instrumentation must not break the host application.
             pass
 
     try:
@@ -180,6 +182,7 @@ def _create_tool_spans_from_message(
                                 set_span_in_context(tool_invocation.span)
                             )
                         except Exception:
+                            # Context attachment failure should not break instrumentation
                             pass
 
                     try:
@@ -221,6 +224,7 @@ def _create_tool_spans_from_message(
                             try:
                                 otel_context.detach(subagent_context_token)
                             except Exception:
+                                # Context detachment failure should not break instrumentation
                                 pass
 
             except Exception as e:
@@ -232,6 +236,7 @@ def _create_tool_spans_from_message(
             try:
                 otel_context.detach(parent_context_token)
             except Exception:
+                # Context detachment failure should not break instrumentation
                 pass
 
 
@@ -363,9 +368,6 @@ def _process_user_message(
     user_parts: List[MessagePart] = []
     tool_parts: List[MessagePart] = []
 
-    # Check if we're inside a Task
-    is_inside_task = len(active_task_stack) > 0
-
     msg_tool_use_result = getattr(msg, "tool_use_result", None)
 
     if hasattr(msg, "content"):
@@ -411,10 +413,12 @@ def _process_user_message(
                             ):
                                 # Convert content blocks to Text parts
                                 text_parts = []
-                                for block in content_blocks:
-                                    if isinstance(block, dict):
-                                        if block.get("type") == "text":
-                                            text_content = block.get("text")
+                                for content_block in content_blocks:
+                                    if isinstance(content_block, dict):
+                                        if content_block.get("type") == "text":
+                                            text_content = content_block.get(
+                                                "text"
+                                            )
                                             if text_content:
                                                 text_parts.append(
                                                     Text(content=text_content)
@@ -485,7 +489,6 @@ def _process_user_message(
                 if text_content:
                     user_parts.append(Text(content=text_content))
 
-    # Re-check if we're inside a Task AFTER popping Task results
     # This ensures Task tool results are NOT filtered out
     is_inside_task = len(active_task_stack) > 0
 
@@ -642,6 +645,7 @@ async def _process_agent_invocation_stream(
             try:
                 handler.stop_invoke_agent(task_entry["subagent"])
             except Exception:
+                # Span closure failure should not break the application
                 pass
 
         # Detach empty context token to restore the original context.
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
index 1da6cdebf..00d9a9697 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_doc_writer_with_task.yaml
@@ -6,7 +6,7 @@ messages:
   data:
     type: system
     subtype: init
-    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python
+    cwd: /path/to/claude-agent-sdk
     session_id: 397137cf-3ade-4272-a0bf-aecf66956d1b
     tools:
     - Task
@@ -108,7 +108,7 @@ messages:
     id: call_c51d222a2f1049b5b3fd7d
     name: Read
     input:
-      file_path: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/src/claude_agent_sdk/types.py
+      file_path: /path/to/claude-agent-sdk/src/claude_agent_sdk/types.py
   parent_tool_use_id: call_a1e29785a6c94856bfc3e5
   error: null
 
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml
index 3dd068871..06a024e74 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_echo_command.yaml
@@ -6,7 +6,7 @@ messages:
   data:
     type: system
     subtype: init
-    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples
+    cwd: /path/to/claude-agent-sdk/examples
     session_id: cf630075-3251-4f84-a57d-4274afed0c46
     tools:
     - Task
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml
index e270b56b0..99b325131 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_foo_sh_command.yaml
@@ -6,7 +6,7 @@ messages:
   data:
     type: system
     subtype: init
-    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples
+    cwd: /path/to/claude-agent-sdk/examples
     session_id: cf630075-3251-4f84-a57d-4274afed0c46
     tools:
     - Task
@@ -107,15 +107,15 @@ messages:
     tool_use_id: call_2066d7694fd9488096cd3e
     content: 'total 432
 
-      drwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .
+      drwxr-xr-x@ 41 user  staff   1312 Jan 23 12:33 .
 
-      drwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..
+      drwxr-xr-x@ 23 user  staff    736 Jan 12 18:02 ..
 
-      drwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__
+      drwxr-xr-x@ 14 user  staff    448 Jan  9 16:12 __pycache__
 
-      -rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py
+      -rw-r--r--@  1 user  staff   4708 Dec 25 17:18 agents.py
 
-      -rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py
+      -rw-r--r--@  1 user  staff   2189 Jan 23 12:34 code_reviewer_example.py
 
       ...'
     is_error: false
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml
index 3889d387d..f84c40c63 100644
--- a/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml
+++ b/instrumentation-loongsuite/loongsuite-instrumentation-claude-agent-sdk/tests/cassettes/test_pretooluse_hook.yaml
@@ -6,7 +6,7 @@ messages:
   data:
     type: system
     subtype: init
-    cwd: /Users/sipercai/project/pyins/extend/claude-agent-sdk-python/examples
+    cwd: /path/to/claude-agent-sdk/examples
     session_id: cf630075-3251-4f84-a57d-4274afed0c46
     tools:
     - Task
@@ -107,15 +107,15 @@ messages:
     tool_use_id: call_2066d7694fd9488096cd3e
     content: 'total 432
 
-      drwxr-xr-x@ 41 sipercai  staff   1312 Jan 23 12:33 .
+      drwxr-xr-x@ 41 user  staff   1312 Jan 23 12:33 .
 
-      drwxr-xr-x@ 23 sipercai  staff    736 Jan 12 18:02 ..
+      drwxr-xr-x@ 23 user  staff    736 Jan 12 18:02 ..
 
-      drwxr-xr-x@ 14 sipercai  staff    448 Jan  9 16:12 __pycache__
+      drwxr-xr-x@ 14 user  staff    448 Jan  9 16:12 __pycache__
 
-      -rw-r--r--@  1 sipercai  staff   4708 Dec 25 17:18 agents.py
+      -rw-r--r--@  1 user  staff   4708 Dec 25 17:18 agents.py
 
-      -rw-r--r--@  1 sipercai  staff   2189 Jan 23 12:34 code_reviewer_example.py
+      -rw-r--r--@  1 user  staff   2189 Jan 23 12:34 code_reviewer_example.py
 
       ...'
     is_error: false