From cd19e48a8c868eec4ede3d250e8f5f9ea035247c Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 24 Mar 2026 13:59:23 -0700 Subject: [PATCH 1/4] python(fix): fall back to application/octet-stream for unknown MIME types --- .../_internal/low_level_wrappers/upload.py | 6 +-- .../low_level_wrappers/test_upload.py | 45 ++++++++++++++++--- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/upload.py b/python/lib/sift_client/_internal/low_level_wrappers/upload.py index e1c9c6777..e2888f22f 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/upload.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/upload.py @@ -17,10 +17,6 @@ # Configure logging logger = logging.getLogger(__name__) -# Parquet MIME types are not included in mimetypes by default -mimetypes.add_type("application/vnd.apache.parquet", ".pqt") -mimetypes.add_type("application/vnd.apache.parquet", ".parquet") - class UploadLowLevelClient(LowLevelClientBase, WithRestClient): """Low-level client for file upload operations. @@ -93,7 +89,7 @@ async def upload_attachment( file_name, mimetype, content_encoding = self._mime_and_content_type_from_path(posix_path) if not mimetype: - raise ValueError(f"The MIME-type of '{posix_path}' could not be computed.") + mimetype = "application/octet-stream" # fallback to generic 'binary data' MIME type # Run the synchronous file upload in a thread pool to avoid blocking the event loop loop = asyncio.get_event_loop() diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py index d40fd9aec..ec6dbba87 100644 --- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py +++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py @@ -1,16 +1,51 @@ """Tests for UploadLowLevelClient functionality.""" from pathlib import Path +from unittest.mock import patch + +import pytest from sift_client._internal.low_level_wrappers.upload import UploadLowLevelClient class TestUploadLowLevelClient: class TestMimeAndContentTypeFromPath: - def test_parquet_file_extension(self): + def test_known_mime_type(self): + _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("video.mp4")) + assert mime == "video/mp4" + + def test_unknown_extension_returns_none(self): _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("data.parquet")) - assert mime == "application/vnd.apache.parquet" + assert mime is None + + def test_no_extension_returns_none(self): + _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("README")) + assert mime is None + + def test_file_name_preserved(self): + name, _, _ = UploadLowLevelClient._mime_and_content_type_from_path( + Path("my_file.pcapng") + ) + assert name == "my_file.pcapng" + + class TestUploadAttachmentMimeFallback: + @pytest.mark.asyncio + async def test_unknown_mime_type_falls_back_to_octet_stream(self, tmp_path): + test_file = tmp_path / "data.parquet" + test_file.write_bytes(b"fake parquet data") + + client = UploadLowLevelClient.__new__(UploadLowLevelClient) + + with patch.object( + client, "_upload_file_sync", return_value="remote-file-123" + ) as mock_upload: + result = await client.upload_attachment( + path=test_file, + entity_id="entity_123", + entity_type="runs", + ) - def test_pqt_file_extension(self): - _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("data.pqt")) - assert mime == "application/vnd.apache.parquet" + mock_upload.assert_called_once() + _, _, mimetype, *_ = mock_upload.call_args[0] + assert mimetype == "application/octet-stream" + assert result == "remote-file-123" From 8922e917dfdbcce44249f36e2eaddf6206d9c48e Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 24 Mar 2026 15:11:46 -0700 Subject: [PATCH 2/4] python(refactor): fall back to application/x-{extension} for unknown MIME types isntead of octet-stream --- .../_internal/low_level_wrappers/upload.py | 10 ++- .../low_level_wrappers/test_upload.py | 90 ++++++++++--------- 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/upload.py b/python/lib/sift_client/_internal/low_level_wrappers/upload.py index e2888f22f..75ce0b902 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/upload.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/upload.py @@ -17,6 +17,10 @@ # Configure logging logger = logging.getLogger(__name__) +# Parquet MIME types are not included in mimetypes by default +mimetypes.add_type("application/vnd.apache.parquet", ".pqt") +mimetypes.add_type("application/vnd.apache.parquet", ".parquet") + class UploadLowLevelClient(LowLevelClientBase, WithRestClient): """Low-level client for file upload operations. @@ -89,7 +93,11 @@ async def upload_attachment( file_name, mimetype, content_encoding = self._mime_and_content_type_from_path(posix_path) if not mimetype: - mimetype = "application/octet-stream" # fallback to generic 'binary data' MIME type + extension = posix_path.suffix + if extension: + mimetype = f"application/x-{extension.removeprefix('.')}" + else: + raise ValueError(f"Cannot determine MIME type for '{path}': file has no extension.") # Run the synchronous file upload in a thread pool to avoid blocking the event loop loop = asyncio.get_event_loop() diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py index ec6dbba87..367db1901 100644 --- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py +++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py @@ -8,44 +8,52 @@ from sift_client._internal.low_level_wrappers.upload import UploadLowLevelClient -class TestUploadLowLevelClient: - class TestMimeAndContentTypeFromPath: - def test_known_mime_type(self): - _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("video.mp4")) - assert mime == "video/mp4" - - def test_unknown_extension_returns_none(self): - _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("data.parquet")) - assert mime is None - - def test_no_extension_returns_none(self): - _, mime, _ = UploadLowLevelClient._mime_and_content_type_from_path(Path("README")) - assert mime is None - - def test_file_name_preserved(self): - name, _, _ = UploadLowLevelClient._mime_and_content_type_from_path( - Path("my_file.pcapng") - ) - assert name == "my_file.pcapng" - - class TestUploadAttachmentMimeFallback: - @pytest.mark.asyncio - async def test_unknown_mime_type_falls_back_to_octet_stream(self, tmp_path): - test_file = tmp_path / "data.parquet" - test_file.write_bytes(b"fake parquet data") - - client = UploadLowLevelClient.__new__(UploadLowLevelClient) - - with patch.object( - client, "_upload_file_sync", return_value="remote-file-123" - ) as mock_upload: - result = await client.upload_attachment( - path=test_file, - entity_id="entity_123", - entity_type="runs", - ) - - mock_upload.assert_called_once() - _, _, mimetype, *_ = mock_upload.call_args[0] - assert mimetype == "application/octet-stream" - assert result == "remote-file-123" +class TestUploadAttachment: + @pytest.mark.asyncio + async def test_known_mime_type(self, tmp_path): + test_file = tmp_path / "video.mp4" + test_file.write_bytes(b"fake data") + + client = UploadLowLevelClient.__new__(UploadLowLevelClient) + + with patch.object(client, "_upload_file_sync", return_value="remote-file-123") as mock: + await client.upload_attachment(path=test_file, entity_id="e1", entity_type="runs") + + _, _, mimetype, *_ = mock.call_args[0] + assert mimetype == "video/mp4" + + @pytest.mark.asyncio + async def test_unknown_extension_falls_back_to_application_x_ext(self, tmp_path): + test_file = tmp_path / "data.pcapng" + test_file.write_bytes(b"fake data") + + client = UploadLowLevelClient.__new__(UploadLowLevelClient) + + with patch.object(client, "_upload_file_sync", return_value="remote-file-123") as mock: + await client.upload_attachment(path=test_file, entity_id="e1", entity_type="runs") + + _, _, mimetype, *_ = mock.call_args[0] + assert mimetype == "application/x-pcapng" + + @pytest.mark.asyncio + async def test_no_extension_raises_value_error(self, tmp_path): + test_file = tmp_path / "README" + test_file.write_bytes(b"fake data") + + client = UploadLowLevelClient.__new__(UploadLowLevelClient) + + with pytest.raises(ValueError, match="file has no extension"): + await client.upload_attachment(path=test_file, entity_id="e1", entity_type="runs") + + @pytest.mark.asyncio + async def test_file_name_preserved(self, tmp_path): + test_file = tmp_path / "my_file.pcapng" + test_file.write_bytes(b"fake data") + + client = UploadLowLevelClient.__new__(UploadLowLevelClient) + + with patch.object(client, "_upload_file_sync", return_value="remote-file-123") as mock: + await client.upload_attachment(path=test_file, entity_id="e1", entity_type="runs") + + file_name, *_ = mock.call_args[0] + assert file_name == Path(test_file) From 101de372e24836f2d4ca06e358e5a38339f79d03 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 24 Mar 2026 15:18:54 -0700 Subject: [PATCH 3/4] python(fix): use lstrip instead of removeprefix for python 3.8 compatibility --- python/lib/sift_client/_internal/low_level_wrappers/upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/upload.py b/python/lib/sift_client/_internal/low_level_wrappers/upload.py index 75ce0b902..771ffd659 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/upload.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/upload.py @@ -95,7 +95,7 @@ async def upload_attachment( if not mimetype: extension = posix_path.suffix if extension: - mimetype = f"application/x-{extension.removeprefix('.')}" + mimetype = f"application/x-{extension.lstrip('.')}" else: raise ValueError(f"Cannot determine MIME type for '{path}': file has no extension.") From 2864488bd9f3afabb7dd39db7f71807d4f2f7ec5 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 24 Mar 2026 15:22:34 -0700 Subject: [PATCH 4/4] python(refator): remove ValueError, default to application/octet-stream for missing extensions --- .../lib/sift_client/_internal/low_level_wrappers/upload.py | 2 +- .../_tests/_internal/low_level_wrappers/test_upload.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/lib/sift_client/_internal/low_level_wrappers/upload.py b/python/lib/sift_client/_internal/low_level_wrappers/upload.py index 771ffd659..4b9e420ef 100644 --- a/python/lib/sift_client/_internal/low_level_wrappers/upload.py +++ b/python/lib/sift_client/_internal/low_level_wrappers/upload.py @@ -97,7 +97,7 @@ async def upload_attachment( if extension: mimetype = f"application/x-{extension.lstrip('.')}" else: - raise ValueError(f"Cannot determine MIME type for '{path}': file has no extension.") + mimetype = "application/octet-stream" # fallback to generic 'binary data' MIME type # Run the synchronous file upload in a thread pool to avoid blocking the event loop loop = asyncio.get_event_loop() diff --git a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py index 367db1901..c9b511174 100644 --- a/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py +++ b/python/lib/sift_client/_tests/_internal/low_level_wrappers/test_upload.py @@ -36,15 +36,18 @@ async def test_unknown_extension_falls_back_to_application_x_ext(self, tmp_path) assert mimetype == "application/x-pcapng" @pytest.mark.asyncio - async def test_no_extension_raises_value_error(self, tmp_path): + async def test_no_extension_falls_back_to_octet_stream(self, tmp_path): test_file = tmp_path / "README" test_file.write_bytes(b"fake data") client = UploadLowLevelClient.__new__(UploadLowLevelClient) - with pytest.raises(ValueError, match="file has no extension"): + with patch.object(client, "_upload_file_sync", return_value="remote-file-123") as mock: await client.upload_attachment(path=test_file, entity_id="e1", entity_type="runs") + _, _, mimetype, *_ = mock.call_args[0] + assert mimetype == "application/octet-stream" + @pytest.mark.asyncio async def test_file_name_preserved(self, tmp_path): test_file = tmp_path / "my_file.pcapng"