Skip to content

sysbox-fs fuse-related deadlock #1002

@okhowang

Description

@okhowang

kernel version 6.6
sysbox version 0.6.7
docker version 28.5.1

all command docker run --runtime sysbox-runc will be stucked.

      1 3985280 3985280    8257 ?             -1 Sl       0   0:01 /usr/bin/containerd-shim-runc-v2 -namespace moby -id 20a69783071597167d651dc3fa0ab9cb584d38f9c107ae79d759488b69594306 -address /run
3985280 3985290 3985280    8257 ?             -1 Sl       0   0:00  \_ /usr/bin/sysbox-runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/20a69783
3985290 3985321 3985321 3985321 ?             -1 Ds   100000   0:00      \_ /usr/bin/sysbox-runc init
      1 3986699 3986699    8257 ?             -1 Sl       0   0:01 /usr/bin/containerd-shim-runc-v2 -namespace moby -id 1130aa43e4bd489b7b2c869761564025ef8d2c916859283d401fabf0d1e2c028 -address /run
3986699 3986714 3986699    8257 ?             -1 Sl       0   0:00  \_ /usr/bin/sysbox-runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/1130aa43
3986714 3986767 3986767 3986767 ?             -1 Ds   100000   0:00      \_ /usr/bin/sysbox-runc init
      1 3986821 3986821    8257 ?             -1 Sl       0   0:01 /usr/bin/containerd-shim-runc-v2 -namespace moby -id cc86c620e705498d46091820904c7dc8eafd9ae2fed3d90a4ffd44073a9fa13b -address /run
3986821 3986831 3986821    8257 ?             -1 Sl       0   0:00  \_ /usr/bin/sysbox-runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/cc86c620
3986831 3986859 3986859 3986859 ?             -1 Ds   100000   0:00      \_ /usr/bin/sysbox-runc init

I found sysbox-fs was blocked on nsenter

      1    8940    8940    8940 ?             -1 Ssl      0 248:07 /usr/bin/sysbox-fs
   8940 3942647 3942647 3942647 ?             -1 Ds   100000   0:00  \_ /usr/bin/sysbox-fs nsenter

and nsenter blocked in kernel

crash> bt 3942647
PID: 3942647  TASK: ffff91fb91b70000  CPU: 28   COMMAND: "runc:[2:INIT]"
 #0 [ffffa5c9724579b8] __schedule at ffffffff8fcccabf
 #1 [ffffa5c972457a20] schedule at ffffffff8fcccf43
 #2 [ffffa5c972457a38] request_wait_answer at ffffffffc088cef0 [fuse]
 #3 [ffffa5c972457a90] fuse_simple_request at ffffffffc088e45c [fuse]
 #4 [ffffa5c972457ac0] fuse_lookup_name at ffffffffc0890e18 [fuse]
 #5 [ffffa5c972457b98] fuse_lookup at ffffffffc0890fe6 [fuse]
 #6 [ffffa5c972457c68] __lookup_slow at ffffffff8f449cc5
 #7 [ffffa5c972457cc0] walk_component at ffffffff8f44b64b
 #8 [ffffa5c972457cf0] link_path_walk at ffffffff8f44c336
 #9 [ffffa5c972457d50] path_openat at ffffffff8f44ef27
#10 [ffffa5c972457da8] do_filp_open at ffffffff8f451014
#11 [ffffa5c972457ec8] do_sys_openat2 at ffffffff8f437101
#12 [ffffa5c972457f10] __x64_sys_openat at ffffffff8f4375ba
#13 [ffffa5c972457f38] do_syscall_64 at ffffffff8fcc1f5f
#14 [ffffa5c972457f50] entry_SYSCALL_64_after_hwframe at ffffffff8fe00130
    RIP: 000000000040e56e  RSP: 000000c0004fcbf0  RFLAGS: 00000212
    RAX: ffffffffffffffda  RBX: ffffffffffffff9c  RCX: 000000000040e56e
    RDX: 0000000000080000  RSI: 000000c000351400  RDI: ffffffffffffff9c
    RBP: 000000c0004fcc30   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000212  R12: 000000c000351400
    R13: 0000000000000100  R14: 000000c000002380  R15: 000000c0004d4a50
    ORIG_RAX: 0000000000000101  CS: 0033  SS: 002b

sysbox-fs seem like doing a nsenter request

(dlv) bt
 0  0x000000000040e56e in internal/runtime/syscall.Syscall6
    at internal/runtime/syscall/asm_linux_amd64.s:36
 1  0x00000000004dacad in syscall.RawSyscall6
    at syscall/syscall_linux.go:64
 2  0x00000000004dad06 in syscall.Syscall
    at syscall/syscall_linux.go:86
 3  0x00000000004d8f18 in syscall.read
    at syscall/zsyscall_linux_amd64.go:736
 4  0x00000000004fa54e in syscall.Read
    at syscall/syscall_unix.go:183
 5  0x00000000004fa54e in internal/poll.ignoringEINTRIO
    at internal/poll/fd_unix.go:745
 6  0x00000000004fa54e in internal/poll.(*FD).Read
    at internal/poll/fd_unix.go:161
 7  0x000000000050610f in os.(*File).read
    at os/file_posix.go:29
 8  0x000000000050610f in os.(*File).Read
    at os/file.go:124
 9  0x0000000000569968 in encoding/json.(*Decoder).refill
    at encoding/json/stream.go:165
10  0x0000000000569565 in encoding/json.(*Decoder).readValue
    at encoding/json/stream.go:140
11  0x0000000000569315 in encoding/json.(*Decoder).Decode
    at encoding/json/stream.go:63
12  0x000000000092c0ee in github.com/nestybox/sysbox-fs/nsenter.(*NSenterEvent).processResponse
    at github.com/nestybox/sysbox-fs/nsenter/event.go:153
13  0x000000000092f045 in github.com/nestybox/sysbox-fs/nsenter.(*NSenterEvent).SendRequest
    at github.com/nestybox/sysbox-fs/nsenter/event.go:657
14  0x00000000009363c2 in github.com/nestybox/sysbox-fs/nsenter.(*nsenterService).SendRequestEvent
    at github.com/nestybox/sysbox-fs/nsenter/eventService.go:66
15  0x000000000093cef3 in github.com/nestybox/sysbox-fs/seccomp.(*mountSyscallInfo).processOverlayMount
    at github.com/nestybox/sysbox-fs/seccomp/mount.go:474
16  0x000000000093adc5 in github.com/nestybox/sysbox-fs/seccomp.(*mountSyscallInfo).process
    at github.com/nestybox/sysbox-fs/seccomp/mount.go:77
17  0x00000000009496ff in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).processMount
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:663
18  0x0000000000948512 in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).processSyscall
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:502
19  0x00000000009482b0 in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).process
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:467
20  0x00000000009481ef in github.com/nestybox/sysbox-fs/seccomp.(*syscallTracer).connHandler.gowrap1
    at github.com/nestybox/sysbox-fs/seccomp/tracer.go:448
21  0x00000000004824e1 in runtime.goexit
    at runtime/asm_amd64.s:1700
(dlv) p e.ReqMsg
("*github.com/nestybox/sysbox-fs/domain.NSenterMessage")(0xc00178db80)
*github.com/nestybox/sysbox-fs/domain.NSenterMessage {
        Type: "mountSyscallRequest",
        Payload: interface {}(*[]*github.com/nestybox/sysbox-fs/domain.MountSyscallPayload) *[
                *(*"github.com/nestybox/sysbox-fs/domain.MountSyscallPayload")(0xc001dab580),
        ],}
(dlv) p *(*"github.com/nestybox/sysbox-fs/domain.MountSyscallPayload")(0xc001dab580)
github.com/nestybox/sysbox-fs/domain.MountSyscallPayload {
        Header: github.com/nestybox/sysbox-fs/domain.NSenterMsgHeader {
                Pid: 3869668,
                Uid: 100000,
                Gid: 100000,
                Root: "/",
                Cwd: "/",
                Capabilities: [2]uint32 [4294967295,511],},
        Mount: github.com/nestybox/sysbox-fs/domain.Mount {
                Source: "overlay",
                Target: "/var/lib/docker/buildkit/containerd-overlayfs/cachemounts/buildk...+12 more",
                FsType: "overlay",
                Flags: 0,
                Data: "userxattr,index=off,workdir=/var/lib/docker/containerd/daemon/io...+690 more",},}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions