Skip to content

Commit 633307e

Browse files
committed
Handle large read/write/sendfile to File; return actual bytes written
The return value for the `jl_fs_write/read/sendfile` functions is an `int`, even through they all take `size_t` length arguments, resulting in potentially truncated counts for large file operations. Instead, on success (including partial read/write), we should return the `uv_fs_t.result` field. I have added a test for this but have left it disabled because it allocates a 5 GiB Vector to read into. One consequence of the truncated return values is that we previously mangled large files while copying them with sendfile. If the return value is truncated to a random negative value, it manifests as #30723 or #39868. If it is truncated to a positive value, sendfile will create a very large destination file with thousands of repeated sections of the source file, causing #56537. The bug will only manifest if LLVM decides to zero-extend the return value. This also changes `jl_fs_write` to return the actual number of bytes written, rather than the number of bytes it was asked to write. If a write returns EAGAIN (usually because the file is O_NONBLOCK/O_NODELAY), we never see the actual number of bytes written.
1 parent f513309 commit 633307e

File tree

3 files changed

+53
-14
lines changed

3 files changed

+53
-14
lines changed

base/filesystem.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ function sendfile(dst::File, src::File, src_offset::Int64, bytes::Int)
219219
check_open(dst)
220220
check_open(src)
221221
while true
222-
result = ccall(:jl_fs_sendfile, Int32, (OS_HANDLE, OS_HANDLE, Int64, Csize_t),
222+
result = ccall(:jl_fs_sendfile, Cssize_t, (OS_HANDLE, OS_HANDLE, Int64, Csize_t),
223223
src.handle, dst.handle, src_offset, bytes)
224224
uv_error("sendfile", result)
225225
nsent = result
@@ -232,10 +232,10 @@ end
232232

233233
function unsafe_write(f::File, buf::Ptr{UInt8}, len::UInt, offset::Int64=Int64(-1))
234234
check_open(f)
235-
err = ccall(:jl_fs_write, Int32, (OS_HANDLE, Ptr{UInt8}, Csize_t, Int64),
235+
ret = ccall(:jl_fs_write, Cssize_t, (OS_HANDLE, Ptr{UInt8}, Csize_t, Int64),
236236
f.handle, buf, len, offset)
237-
uv_error("write", err)
238-
return len
237+
uv_error("write", ret)
238+
return ret
239239
end
240240

241241
write(f::File, c::UInt8) = write(f, Ref{UInt8}(c))
@@ -265,7 +265,7 @@ end
265265
function read(f::File, ::Type{UInt8})
266266
check_open(f)
267267
p = Ref{UInt8}()
268-
ret = ccall(:jl_fs_read, Int32, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
268+
ret = ccall(:jl_fs_read, Cssize_t, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
269269
f.handle, p, 1)
270270
uv_error("read", ret)
271271
@assert ret <= sizeof(p) == 1
@@ -298,7 +298,7 @@ read(f::File, ::Type{T}) where {T<:AbstractChar} = T(read(f, Char)) # fallback
298298

299299
function unsafe_read(f::File, p::Ptr{UInt8}, nel::UInt)
300300
check_open(f)
301-
ret = ccall(:jl_fs_read, Int32, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
301+
ret = ccall(:jl_fs_read, Cssize_t, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
302302
f.handle, p, nel)
303303
uv_error("read", ret)
304304
ret == nel || throw(EOFError())
@@ -314,7 +314,7 @@ function readbytes!(f::File, b::MutableDenseArrayType{UInt8}, nb=length(b))
314314
if length(b) < nr
315315
resize!(b, nr)
316316
end
317-
ret = ccall(:jl_fs_read, Int32, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
317+
ret = ccall(:jl_fs_read, Cssize_t, (OS_HANDLE, Ptr{Cvoid}, Csize_t),
318318
f.handle, b, nr)
319319
uv_error("read", ret)
320320
return ret

src/jl_uv.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -583,16 +583,16 @@ JL_DLLEXPORT int jl_fs_rename(const char *src_path, const char *dst_path)
583583
return ret;
584584
}
585585

586-
JL_DLLEXPORT int jl_fs_sendfile(uv_os_fd_t src_fd, uv_os_fd_t dst_fd,
587-
int64_t in_offset, size_t len)
586+
JL_DLLEXPORT ssize_t jl_fs_sendfile(uv_os_fd_t src_fd, uv_os_fd_t dst_fd,
587+
int64_t in_offset, size_t len)
588588
{
589589
uv_fs_t req;
590590
JL_SIGATOMIC_BEGIN();
591591
int ret = uv_fs_sendfile(unused_uv_loop_arg, &req, dst_fd, src_fd,
592592
in_offset, len, NULL);
593593
uv_fs_req_cleanup(&req);
594594
JL_SIGATOMIC_END();
595-
return ret;
595+
return req.result ? req.result : ret;
596596
}
597597

598598
JL_DLLEXPORT int jl_fs_hardlink(char *path, char *new_path)
@@ -635,7 +635,15 @@ JL_DLLEXPORT int jl_fs_access(char *path, int mode)
635635
return ret;
636636
}
637637

638-
JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
638+
/*
639+
* A note about the uv_fs_* functions that return a count of bytes written:
640+
* uv_fs_t.result is a ssize_t, but the function returns int. The result field
641+
* needs to be checked so we can distinguish a real error from a count that
642+
* overflows an int, but we can't use it directly---some error conditions will
643+
* leave it set to 0 but return an error.
644+
*/
645+
646+
JL_DLLEXPORT ssize_t jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
639647
int64_t offset) JL_NOTSAFEPOINT
640648
{
641649
jl_task_t *ct = jl_get_current_task();
@@ -654,18 +662,18 @@ JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
654662
jl_io_loop = uv_default_loop();
655663
int ret = uv_fs_write(unused_uv_loop_arg, &req, handle, buf, 1, offset, NULL);
656664
uv_fs_req_cleanup(&req);
657-
return ret;
665+
return req.result ? req.result : ret;
658666
}
659667

660-
JL_DLLEXPORT int jl_fs_read(uv_os_fd_t handle, char *data, size_t len)
668+
JL_DLLEXPORT ssize_t jl_fs_read(uv_os_fd_t handle, char *data, size_t len)
661669
{
662670
uv_fs_t req;
663671
uv_buf_t buf[1];
664672
buf[0].base = data;
665673
buf[0].len = len;
666674
int ret = uv_fs_read(unused_uv_loop_arg, &req, handle, buf, 1, -1, NULL);
667675
uv_fs_req_cleanup(&req);
668-
return ret;
676+
return req.result ? req.result : ret;
669677
}
670678

671679
JL_DLLEXPORT int jl_fs_close(uv_os_fd_t handle)

test/file.jl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,3 +2200,34 @@ end
22002200
end
22012201

22022202
@test Base.infer_return_type(stat, (String,)) == Base.Filesystem.StatStruct
2203+
2204+
# Partial write() to File should return the actual number of bytes written
2205+
@test 1000000 == let p = Pipe()
2206+
# Create a non-blocking pipe that will fill and return EAGAIN
2207+
Base.link_pipe!(p; writer_supports_async=true)
2208+
try
2209+
f = Base.Filesystem.File(Base._fd(p.in))
2210+
@sync begin
2211+
@async begin
2212+
write(f, rand(UInt8, 1000000))
2213+
close(p.in)
2214+
end
2215+
length(read(p))
2216+
end
2217+
finally
2218+
close(p)
2219+
end
2220+
end
2221+
2222+
# Very large read() shouldn't return EOFError because the return value was truncated.
2223+
@test_skip let
2224+
FS = Base.Filesystem
2225+
f = FS.open("/dev/urandom", FS.JL_O_RDONLY)
2226+
try
2227+
v = Vector{UInt8}(undef, 5 * 2^30) # 5 GiB (larger than INT32_MAX)
2228+
read!(f, v)
2229+
true
2230+
finally
2231+
close(f)
2232+
end
2233+
end

0 commit comments

Comments
 (0)