From 5ea7fe43c35f45ae9e40efb589fb1a841a4d009a Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Thu, 27 Mar 2025 13:10:08 +0530
Subject: [PATCH 1/6] E2E: fuzzer base create file api tests

---
 .../api/files/test_api_create_file.py         | 440 +++++++++++++++++-
 engine/e2e-test/requirements.txt              |   3 +-
 2 files changed, 421 insertions(+), 22 deletions(-)

diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 03525672d..1d12b157d 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -7,8 +7,24 @@
 from utils.logger import log_response
 from utils.assertion import assert_equal
 import fnmatch
+import io # For creating in-memory files
+import tempfile # For temporary files on disk if needed
+import json # For sending JSON data
+from hypothesis import given, strategies as st, settings, HealthCheck
 
+# --- Constants ---
+BASE_URL = "http://127.0.0.1:3928/v1"
+POST_FILE_URL = f"{BASE_URL}/files"
+EXPECTED_PURPOSE = "assistants"
+EXPECTED_OBJECT = "file"
+REQUEST_TIMEOUT = 15 # Default timeout for requests in seconds
 
+# --- Helper Function ---
+def is_server_error(status_code):
+    """Checks if the status code indicates a server-side error (5xx)."""
+    return 500 <= status_code < 600
+
+# --- Test Class ---
 class TestApiCreateFile:
 
     @pytest.fixture(autouse=True)
@@ -16,30 +32,34 @@ def setup_and_teardown(self):
         # Setup
         success = start_server()
         if not success:
-            raise Exception("Failed to start server")
+            pytest.fail("Failed to start server", pytrace=False)
 
         yield
 
         # Teardown
         stop_server()
-        
-    def test_api_create_file_successfully(self):
-        # Define file path
-        file_path_rel = os.path.join("e2e-test", "api", "files", "blank.txt")
-        file_path = os.path.join(os.getcwd(), file_path_rel)
-        log_response(file_path, "test_api_create_file_successfully")
-
-        post_file_url = "http://127.0.0.1:3928/v1/files"
+
+    # ---- Success Case ----
+    def test_api_create_file_successfully(self, tmp_path):
+        """Verify successful file upload with valid parameters."""
+        test_name = "test_api_create_file_successfully"
+        file_path = tmp_path / "blank.txt"
+        file_content = b"This is a test file."
+        file_path.write_bytes(file_content)
+        log_response(f"Test file path: {file_path}", test_name)
+
         with open(file_path, "rb") as file:
             files = {"file": ("blank.txt", file, "text/plain")}
-            data = {"purpose": "assistants"}
-            response = requests.post(post_file_url, files=files, data=data)
-            log_response(response.text, "test_api_create_file_successfully")
-            log_response(response.status_code, "test_api_create_file_successfully")
+            data = {"purpose": EXPECTED_PURPOSE}
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
+
+            log_response(f"Status Code: {response.status_code}", test_name)
+            log_response(f"Response Body: {response.text}", test_name)
 
-        json_data = response.json()
-        log_response(json_data, "test_api_create_file_successfully")
         assert_equal(response.status_code, 200)
+        json_data = response.json()
+
+        log_response(f"JSON Response: {json_data}", test_name)
 
         # Schema to validate
         schema = {
@@ -48,16 +68,394 @@ def test_api_create_file_successfully(self):
                 "bytes": {"type": "integer"},
                 "created_at": {"type": "integer"},
                 "filename": {"type": "string"},
-                "id": {"type": "string"},
-                "object": {"type": "string"},
-                "purpose": {"type": "string"}
+                "id": {"type": "string", "pattern": "^file-"}, # IDs often have prefixes
+                "object": {"type": "string", "enum": [EXPECTED_OBJECT]},
+                "purpose": {"type": "string", "enum": [EXPECTED_PURPOSE]}
             },
             "required": ["bytes", "created_at", "filename", "id", "object", "purpose"]
         }
 
         # Validate response schema
-        jsonschema.validate(instance=json_data, schema=schema)
+        try:
+            jsonschema.validate(instance=json_data, schema=schema)
+        except jsonschema.exceptions.ValidationError as e:
+             pytest.fail(f"Response schema validation failed: {e}", pytrace=False)
 
         # Assert content
-        assert (fnmatch.fnmatch(json_data["filename"], "blank_*.txt") or json_data["filename"] == "blank.txt"), f"Filename {json_data['filename']} does not match pattern blank_*.txt or blank.txt"
-        assert_equal(json_data["purpose"], "assistants")
\ No newline at end of file
+        assert (fnmatch.fnmatch(json_data["filename"], "blank_*.txt") or json_data["filename"] == "blank.txt"), \
+            f"Filename {json_data['filename']} does not match pattern blank_*.txt or blank.txt"
+        assert_equal(json_data["purpose"], EXPECTED_PURPOSE)
+        assert_equal(json_data["bytes"], len(file_content))
+        assert json_data["id"].startswith("file-") # Example: Check ID prefix
+
+
+    # ---- Tests for Missing/Invalid Parts ----
+
+    def test_api_create_file_missing_file_part(self):
+        """Verify API handles requests missing the 'file' multipart field."""
+        test_name = "test_api_create_file_missing_file_part"
+        data = {"purpose": EXPECTED_PURPOSE}
+        # No 'files' argument passed to requests.post
+        response = requests.post(POST_FILE_URL, data=data, timeout=REQUEST_TIMEOUT)
+
+        log_response(f"Status Code: {response.status_code}", test_name)
+        log_response(f"Response Body: {response.text}", test_name)
+
+        # Expecting a client error (e.g., 400 Bad Request) because 'file' is required.
+        assert 400 <= response.status_code < 500, f"Expected 4xx status code, got {response.status_code}"
+        assert not is_server_error(response.status_code), "Server error occurred"
+
+    def test_api_create_file_missing_purpose_part(self, tmp_path):
+        """Verify API handles requests missing the 'purpose' multipart field."""
+        test_name = "test_api_create_file_missing_purpose_part"
+        file_path = tmp_path / "missing_purpose.txt"
+        file_path.write_bytes(b"content")
+
+        with open(file_path, "rb") as file:
+            files = {"file": (file_path.name, file, "text/plain")}
+            # No 'data' argument passed to requests.post
+            response = requests.post(POST_FILE_URL, files=files, timeout=REQUEST_TIMEOUT)
+
+            log_response(f"Status Code: {response.status_code}", test_name)
+            log_response(f"Response Body: {response.text}", test_name)
+
+        # Expecting a client error (e.g., 400 Bad Request) because 'purpose' is likely required.
+        assert 400 <= response.status_code < 500, f"Expected 4xx status code, got {response.status_code}"
+        assert not is_server_error(response.status_code), "Server error occurred"
+
+    def test_api_create_file_empty_content(self):
+        """Verify API handles uploading an empty file (0 bytes)."""
+        test_name = "test_api_create_file_empty_content"
+        file_content = b""
+        file_obj = io.BytesIO(file_content)
+        filename = "empty_file.txt"
+
+        files = {"file": (filename, file_obj, "application/octet-stream")}
+        data = {"purpose": EXPECTED_PURPOSE}
+
+        response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
+
+        log_response(f"Status Code: {response.status_code}", test_name)
+        log_response(f"Response Body: {response.text}", test_name)
+
+        # Empty files are usually acceptable. Expect 200 OK.
+        assert_equal(response.status_code, 200)
+        try:
+            json_data = response.json()
+            assert_equal(json_data.get("bytes"), 0)
+            assert_equal(json_data.get("purpose"), EXPECTED_PURPOSE)
+            assert_equal(json_data.get("object"), EXPECTED_OBJECT)
+            assert isinstance(json_data.get("id"), str)
+        except (requests.exceptions.JSONDecodeError, AssertionError) as e:
+             pytest.fail(f"Validation failed for empty file upload: {e}\nResponse: {response.text}", pytrace=False)
+
+    def test_api_create_file_empty_purpose_string(self, tmp_path):
+        """Verify API handles 'purpose' field being an empty string."""
+        test_name = "test_api_create_file_empty_purpose_string"
+        file_path = tmp_path / "empty_purpose_val.txt"
+        file_path.write_bytes(b"content")
+
+        with open(file_path, "rb") as file:
+            files = {"file": (file_path.name, file, "text/plain")}
+            data = {"purpose": ""} # Empty string for purpose
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
+
+            log_response(f"Status Code: {response.status_code}", test_name)
+            log_response(f"Response Body: {response.text}", test_name)
+
+        # Expecting a client error (400 Bad Request) as "" is not a valid purpose.
+        assert 400 <= response.status_code < 500, f"Expected 4xx status code for empty purpose, got {response.status_code}"
+        assert not is_server_error(response.status_code), "Server error occurred"
+
+    # ---- Tests for Incorrect Request Structure ----
+
+    @pytest.mark.parametrize("method", ["GET", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"])
+    def test_api_create_file_wrong_method(self, method):
+        """Verify endpoint rejects incorrect HTTP methods."""
+        test_name = f"test_api_create_file_wrong_method_{method}"
+        log_response(f"Testing method: {method}", test_name)
+
+        try:
+            response = requests.request(method, POST_FILE_URL, timeout=REQUEST_TIMEOUT)
+        except requests.exceptions.RequestException as e:
+            log_response(f"Request failed for method {method}: {e}", test_name)
+            # Allow connection errors etc, but fail on unexpected successes or server errors
+            return
+
+        log_response(f"Status Code: {response.status_code}", test_name)
+        log_response(f"Response Body: {response.text}", test_name)
+
+        # Expect 405 Method Not Allowed.
+        assert_equal(response.status_code, 405)
+        assert not is_server_error(response.status_code), f"Server error for method {method}"
+
+    @pytest.mark.parametrize("content_type", [
+        "application/json",
+        "text/plain",
+        "application/xml",
+        "application/x-www-form-urlencoded",
+        None # Test missing Content-Type header
+    ])
+    def test_api_create_file_invalid_content_type(self, content_type):
+        """Verify API rejects requests with incorrect Content-Type header."""
+        test_name = f"test_api_create_file_invalid_content_type_{content_type or 'None'}"
+        log_response(f"Testing Content-Type: {content_type}", test_name)
+
+        headers = {}
+        if content_type:
+            headers["Content-Type"] = content_type
+
+        # Send some dummy data appropriate for the fake content type if needed
+        data_to_send = '{"purpose": "assistants", "file": "dummy"}' if content_type == "application/json" else "dummy data"
+
+        # Use data= instead of files= since we are not sending multipart
+        response = requests.post(POST_FILE_URL, headers=headers, data=data_to_send, timeout=REQUEST_TIMEOUT)
+
+        log_response(f"Status Code: {response.status_code}", test_name)
+        log_response(f"Response Body: {response.text}", test_name)
+
+        # Expect 415 Unsupported Media Type or potentially 400 Bad Request.
+        assert response.status_code in [400, 415], f"Expected 400 or 415 status code, got {response.status_code}"
+        assert not is_server_error(response.status_code), "Server error occurred"
+
+
+    @pytest.mark.parametrize("path", [
+        "/v1/file",             # Singular instead of plural
+        "/v1/files/",           # Trailing slash (might be treated differently)
+        "/v1/files/some-id",    # Looks like a specific resource path
+        "/v2/files",            # Incorrect version
+        "/files"                # Missing base path/version
+    ])
+
+    def test_api_create_file_incorrect_path(self, path):
+        """Verify requests to incorrect paths are rejected."""
+        test_name = f"test_api_create_file_incorrect_path_{path.replace('/', '_')}"
+        full_url = f"http://127.0.0.1:3928{path}" # Construct full URL
+        log_response(f"Testing incorrect path: {full_url}", test_name)
+
+        # Use dummy data/files, the path is the focus
+        file_obj = io.BytesIO(b"dummy")
+        files = {"file": ("dummy.txt", file_obj, "application/octet-stream")}
+        data = {"purpose": EXPECTED_PURPOSE}
+
+        try:
+            response = requests.post(full_url, files=files, data=data, timeout=REQUEST_TIMEOUT)
+        except requests.exceptions.RequestException as e:
+            # Connection errors are possible if base path is totally wrong
+            log_response(f"Request failed for path {path}: {e}", test_name)
+            return # Don't assert on connection errors
+
+        log_response(f"Status Code: {response.status_code}", test_name)
+        log_response(f"Response Body: {response.text}", test_name)
+
+        # Expect 404 Not Found.
+        assert_equal(response.status_code, 404)
+        assert not is_server_error(response.status_code), f"Server error for path {path}"
+
+    # ---- Test for Large File (can be slow) ----
+    # Mark this test as 'slow' using pytest markers if needed:
+    # @pytest.mark.slow
+    # You can run slow tests with `pytest -m slow` and skip them with `pytest -m "not slow"`
+    @pytest.mark.skip(reason="Test requires significant resources/time, enable manually if needed")
+    def test_api_create_file_very_large_file(self, tmp_path):
+        """Verify API handles very large files (e.g., > 100MB). SKIPPED by default."""
+        test_name = "test_api_create_file_very_large_file"
+        # Define large size (e.g., 100 MB)
+        large_size_bytes = 100 * 1024 * 1024
+        large_file_path = tmp_path / "large_file.bin"
+
+        log_response(f"Creating large file ({large_size_bytes / (1024*1024):.1f} MB)...", test_name)
+        try:
+            with open(large_file_path, "wb") as f:
+                # Write in chunks to manage memory, though seek is faster for sparse files
+                # For simplicity here, just seek and write a byte at the end
+                # Note: This creates a sparse file on filesystems that support it (like ext4, NTFS)
+                # which takes up little actual disk space but reports the large size.
+                # If the server reads the whole declared size, this test is still valid.
+                # If you need actual dense data, write in chunks.
+                f.seek(large_size_bytes - 1)
+                f.write(b"\0")
+            log_response(f"Large file created: {large_file_path}", test_name)
+
+            file_size_on_disk = large_file_path.stat().st_size
+            assert file_size_on_disk == large_size_bytes, f"Created file size mismatch: {file_size_on_disk}"
+
+            with open(large_file_path, "rb") as file:
+                files = {"file": (large_file_path.name, file, "application/octet-stream")}
+                data = {"purpose": EXPECTED_PURPOSE}
+                # Increase timeout significantly for large uploads
+                large_file_timeout = 300 # 5 minutes
+
+                log_response(f"Attempting upload...", test_name)
+                response = requests.post(POST_FILE_URL, files=files, data=data, timeout=large_file_timeout)
+
+            log_response(f"Status Code: {response.status_code}", test_name)
+            # Avoid logging potentially huge response body
+            log_response(f"Response Length: {len(response.text)}", test_name)
+
+
+            assert not is_server_error(response.status_code), "Server error occurred processing large file"
+
+            if response.status_code == 200:
+                log_response("Large file uploaded successfully.", test_name)
+                json_data = response.json()
+                assert_equal(json_data.get("bytes"), large_size_bytes)
+                assert_equal(json_data.get("purpose"), EXPECTED_PURPOSE)
+            elif response.status_code == 413:
+                log_response("Server rejected large file (413 Payload Too Large) - This may be expected.", test_name)
+                # This is an acceptable outcome if the server has limits.
+            else:
+                pytest.fail(f"Unexpected status code {response.status_code} for large file upload.", pytrace=False)
+
+        except MemoryError:
+             pytest.skip("Skipping large file test due to insufficient memory.")
+        except requests.exceptions.Timeout:
+             pytest.fail("Request timed out during large file upload.", pytrace=False)
+        except requests.exceptions.RequestException as e:
+             pytest.fail(f"Request failed during large file upload: {e}", pytrace=False)
+        finally:
+             # Clean up the large file if it was created
+             if large_file_path.exists():
+                 try:
+                     large_file_path.unlink()
+                 except OSError as e:
+                     log_response(f"Warning: Failed to delete large temp file {large_file_path}: {e}", test_name)
+
+
+    # ----- Fuzzing Tests using Hypothesis (from previous response) -----
+
+    HYPOTHESIS_SETTINGS = settings(
+        deadline=None, # Allow more time for network requests
+        suppress_health_check=[HealthCheck.too_slow, HealthCheck.data_too_large],
+        max_examples=50 # Adjust as needed
+    )
+
+    fuzzy_filenames = st.text(
+        alphabet=st.characters(min_codepoint=1, max_codepoint=0xFFFF, blacklist_characters='\\/\0'),
+        min_size=1, max_size=255
+    )
+
+    @HYPOTHESIS_SETTINGS
+    @given(filename=fuzzy_filenames)
+    def test_fuzz_filename(self, filename):
+        """Tests uploading a file with various generated filenames."""
+        test_name = "test_fuzz_filename"
+        log_response(f"Fuzzing with filename: {filename!r}", test_name)
+        file_obj = io.BytesIO(b"Fuzz test content")
+        files = {"file": (filename, file_obj, "application/octet-stream")}
+        data = {"purpose": EXPECTED_PURPOSE}
+
+        try:
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
+            log_response(f"Status: {response.status_code}", test_name)
+            log_response(f"Response head: {response.text[:100]}...", test_name)
+        except requests.exceptions.RequestException as e:
+            log_response(f"Request failed: {e}", test_name)
+            return
+
+        assert not is_server_error(response.status_code), \
+            f"Server error ({response.status_code}) for filename: {filename!r}"
+        # Optional stricter check: Allow only 200 or 400 (if filename validation exists)
+        # assert response.status_code == 200 or response.status_code == 400, \
+        #     f"Unexpected status {response.status_code} for filename: {filename!r}"
+
+    fuzzy_purposes = st.one_of(
+        st.none(), st.booleans(), st.integers(),
+        st.floats(allow_nan=False, allow_infinity=False),
+        st.text(max_size=1024), st.binary(max_size=1024)
+    )
+
+    @HYPOTHESIS_SETTINGS
+    @given(purpose=fuzzy_purposes)
+    def test_fuzz_purpose(self, purpose):
+        """Tests uploading a file with various generated 'purpose' values."""
+        test_name = "test_fuzz_purpose"
+        log_response(f"Fuzzing with purpose: {purpose!r}", test_name)
+        file_obj = io.BytesIO(b"Purpose fuzz test")
+        files = {"file": ("purpose_test.txt", file_obj, "text/plain")}
+        if isinstance(purpose, bytes):
+             data = {"purpose": purpose.decode('utf-8', errors='replace')}
+        else:
+             data = {"purpose": str(purpose)}
+
+        try:
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
+            log_response(f"Status: {response.status_code}", test_name)
+            log_response(f"Response head: {response.text[:100]}...", test_name)
+        except requests.exceptions.RequestException as e:
+            log_response(f"Request failed: {e}", test_name)
+            return
+
+        assert not is_server_error(response.status_code), \
+            f"Server error ({response.status_code}) for purpose: {purpose!r}"
+
+        if str(purpose) == EXPECTED_PURPOSE:
+            assert response.status_code == 200, \
+                f"Expected 200 for valid purpose '{EXPECTED_PURPOSE}', got {response.status_code}"
+        elif response.status_code == 200:
+             log_response(f"WARNING: Received 200 OK for unexpected purpose: {purpose!r}", test_name)
+        else: # Expecting 4xx
+            assert 400 <= response.status_code < 500, \
+                f"Expected client error (4xx) for invalid purpose {purpose!r}, got {response.status_code}"
+
+    fuzzy_content = st.binary(max_size=2048)
+
+    @HYPOTHESIS_SETTINGS
+    @given(content=fuzzy_content)
+    def test_fuzz_file_content(self, content):
+        """Tests uploading files with various binary content."""
+        test_name = "test_fuzz_file_content"
+        log_response(f"Fuzzing with file content length: {len(content)} bytes", test_name)
+        file_obj = io.BytesIO(content)
+        files = {"file": ("content_test.bin", file_obj, "application/octet-stream")}
+        data = {"purpose": EXPECTED_PURPOSE}
+
+        try:
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT + 5) # Slightly longer timeout
+            log_response(f"Status: {response.status_code}", test_name)
+            log_response(f"Response head: {response.text[:100]}...", test_name)
+        except requests.exceptions.RequestException as e:
+            log_response(f"Request failed: {e}", test_name)
+            return
+
+        assert not is_server_error(response.status_code), \
+            f"Server error ({response.status_code}) for content length: {len(content)}"
+
+        if response.status_code == 200:
+             try:
+                 json_data = response.json()
+                 assert_equal(json_data.get("bytes"), len(content))
+                 assert_equal(json_data.get("purpose"), EXPECTED_PURPOSE)
+             except (requests.exceptions.JSONDecodeError, AssertionError, TypeError) as e:
+                  pytest.fail(f"Validation failed for fuzz content (len {len(content)}): {e}\nResponse: {response.text}", pytrace=False)
+
+
+    @HYPOTHESIS_SETTINGS
+    @given(
+        filename=fuzzy_filenames,
+        purpose_val=st.text(max_size=100),
+        content=st.binary(max_size=512)
+    )
+    def test_fuzz_combined(self, filename, purpose_val, content):
+        """Tests uploading files with combined variations of filename, purpose, and content."""
+        test_name = "test_fuzz_combined"
+        log_response(f"Fuzzing combined: fn={filename!r}, p={purpose_val!r}, len={len(content)}", test_name)
+        file_obj = io.BytesIO(content)
+        files = {"file": (filename, file_obj, "application/octet-stream")}
+        data = {"purpose": purpose_val}
+
+        try:
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT + 5)
+            log_response(f"Status: {response.status_code}", test_name)
+        except requests.exceptions.RequestException as e:
+            log_response(f"Request failed: {e}", test_name)
+            return
+
+        assert not is_server_error(response.status_code), \
+            f"Server error ({response.status_code}) for combined input: fn={filename!r}, p={purpose_val!r}, len={len(content)}"
+
+        # Further checks depend heavily on API logic (is purpose_val always invalid unless 'assistants'?)
+        if purpose_val == EXPECTED_PURPOSE and response.status_code != 200:
+             log_response(f"WARNING: Expected 200 for valid purpose but got {response.status_code} in combined test.", test_name)
+        elif purpose_val != EXPECTED_PURPOSE and not (400 <= response.status_code < 500):
+             log_response(f"WARNING: Expected 4xx for invalid purpose '{purpose_val}' but got {response.status_code} in combined test.", test_name)
diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt
index 6991b03d3..0b6e1fdf4 100644
--- a/engine/e2e-test/requirements.txt
+++ b/engine/e2e-test/requirements.txt
@@ -4,4 +4,5 @@ pytest-asyncio
 requests
 pyyaml
 jsonschema
-tenacity
\ No newline at end of file
+tenacity
+hypothesis

From ac7b6c8201d77b8f06b2af1bfbd0f8d04f47df1c Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Thu, 27 Mar 2025 13:26:11 +0530
Subject: [PATCH 2/6] Remove wrong method check

---
 .../api/files/test_api_create_file.py         | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 1d12b157d..438782a03 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -169,25 +169,6 @@ def test_api_create_file_empty_purpose_string(self, tmp_path):
 
     # ---- Tests for Incorrect Request Structure ----
 
-    @pytest.mark.parametrize("method", ["GET", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"])
-    def test_api_create_file_wrong_method(self, method):
-        """Verify endpoint rejects incorrect HTTP methods."""
-        test_name = f"test_api_create_file_wrong_method_{method}"
-        log_response(f"Testing method: {method}", test_name)
-
-        try:
-            response = requests.request(method, POST_FILE_URL, timeout=REQUEST_TIMEOUT)
-        except requests.exceptions.RequestException as e:
-            log_response(f"Request failed for method {method}: {e}", test_name)
-            # Allow connection errors etc, but fail on unexpected successes or server errors
-            return
-
-        log_response(f"Status Code: {response.status_code}", test_name)
-        log_response(f"Response Body: {response.text}", test_name)
-
-        # Expect 405 Method Not Allowed.
-        assert_equal(response.status_code, 405)
-        assert not is_server_error(response.status_code), f"Server error for method {method}"
 
     @pytest.mark.parametrize("content_type", [
         "application/json",
@@ -355,9 +336,6 @@ def test_fuzz_filename(self, filename):
 
         assert not is_server_error(response.status_code), \
             f"Server error ({response.status_code}) for filename: {filename!r}"
-        # Optional stricter check: Allow only 200 or 400 (if filename validation exists)
-        # assert response.status_code == 200 or response.status_code == 400, \
-        #     f"Unexpected status {response.status_code} for filename: {filename!r}"
 
     fuzzy_purposes = st.one_of(
         st.none(), st.booleans(), st.integers(),

From 861a325513743b948b27cae9676876d213812961 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Thu, 27 Mar 2025 13:47:06 +0530
Subject: [PATCH 3/6] Prevent hypothesis from creating a lone Unicode surrogate
 character for fuzz file

---
 engine/e2e-test/api/files/test_api_create_file.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 438782a03..6eddc55bf 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -312,8 +312,18 @@ def test_api_create_file_very_large_file(self, tmp_path):
     )
 
     fuzzy_filenames = st.text(
-        alphabet=st.characters(min_codepoint=1, max_codepoint=0xFFFF, blacklist_characters='\\/\0'),
-        min_size=1, max_size=255
+        alphabet=st.characters(
+            min_codepoint=1,
+            max_codepoint=0x10FFFF, # New: Full range including Supplementary Planes
+
+            blacklist_categories=(
+                'Cs',
+                'Cc',
+            ),
+            blacklist_characters='\\/\0'
+        ),
+        min_size=1,
+        max_size=255 # Common filesystem limit
     )
 
     @HYPOTHESIS_SETTINGS

From c027d5aa16b67e066d0725abb5205fa40cd3f91c Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Mon, 31 Mar 2025 18:49:00 +0530
Subject: [PATCH 4/6] Add path traversal security tests

---
 .../api/files/test_api_create_file.py         | 63 +++++++++++++++++--
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 6eddc55bf..9ffcfdd1d 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -236,7 +236,6 @@ def test_api_create_file_incorrect_path(self, path):
     # ---- Test for Large File (can be slow) ----
     # Mark this test as 'slow' using pytest markers if needed:
     # @pytest.mark.slow
-    # You can run slow tests with `pytest -m slow` and skip them with `pytest -m "not slow"`
     @pytest.mark.skip(reason="Test requires significant resources/time, enable manually if needed")
     def test_api_create_file_very_large_file(self, tmp_path):
         """Verify API handles very large files (e.g., > 100MB). SKIPPED by default."""
@@ -253,7 +252,6 @@ def test_api_create_file_very_large_file(self, tmp_path):
                 # Note: This creates a sparse file on filesystems that support it (like ext4, NTFS)
                 # which takes up little actual disk space but reports the large size.
                 # If the server reads the whole declared size, this test is still valid.
-                # If you need actual dense data, write in chunks.
                 f.seek(large_size_bytes - 1)
                 f.write(b"\0")
             log_response(f"Large file created: {large_file_path}", test_name)
@@ -302,6 +300,43 @@ def test_api_create_file_very_large_file(self, tmp_path):
                  except OSError as e:
                      log_response(f"Warning: Failed to delete large temp file {large_file_path}: {e}", test_name)
 
+    # ----- Security Tests -----
+
+    @pytest.mark.parametrize("malicious_filename", [
+        "../sensitive.conf",
+        "test/../../etc/passwd",
+        "..\\windows\\system32\\config", # Windows style added for coverage
+        "....//tricky.txt",
+        "file/name/with/../in/middle.txt",
+        "/absolute/path/../file.txt",
+        "nul../file.txt",
+        "file.txt..",
+        "..file.txt"
+    ])
+    def test_api_create_file_path_traversal_filename(self, malicious_filename):
+        """Verify API rejects filenames attempting path traversal using '..'."""
+        test_name = f"test_api_create_file_path_traversal_filename_{malicious_filename.replace('/', '_').replace('\\', '_').replace('.', '_')}"
+        log_response(f"Testing potentially malicious filename: {malicious_filename!r}", test_name)
+
+        file_content = b"Path traversal attempt"
+        file_obj = io.BytesIO(file_content)
+
+        files = {"file": (malicious_filename, file_obj, "application/octet-stream")}
+        data = {"purpose": EXPECTED_PURPOSE}
+
+        try:
+            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
+        except requests.exceptions.RequestException as e:
+             log_response(f"Request failed client-side for filename {malicious_filename!r}: {e}", test_name)
+             pytest.fail(f"Request failed before reaching server for filename {malicious_filename!r}", pytrace=False)
+
+        log_response(f"Status Code: {response.status_code}", test_name)
+        log_response(f"Response Body (first 500 chars): {response.text[:500]}...", test_name)
+
+        assert 400 <= response.status_code < 500, \
+            f"Expected 4xx status code for malicious filename '{malicious_filename}', got {response.status_code}"
+        assert not is_server_error(response.status_code), f"Server error occurred for filename {malicious_filename}"
+
 
     # ----- Fuzzing Tests using Hypothesis (from previous response) -----
 
@@ -320,7 +355,7 @@ def test_api_create_file_very_large_file(self, tmp_path):
                 'Cs',
                 'Cc',
             ),
-            blacklist_characters='\\/\0'
+            blacklist_characters='\\/\0' # Blacklist slashes, backslashes, null bytes
         ),
         min_size=1,
         max_size=255 # Common filesystem limit
@@ -344,9 +379,15 @@ def test_fuzz_filename(self, filename):
             log_response(f"Request failed: {e}", test_name)
             return
 
+        # This fuzz test checks broader filename handling, not just path traversal which is tested above.
+        # A strict check might be: assert response.status_code == 200
+        # However, some generated filenames might be legitimately invalid (e.g., too long if server enforces < 255, specific chars).
+        # Therefore, only check for server errors which indicate unexpected crashes.
+        # Client errors (4xx) might be acceptable for certain fuzz inputs.
         assert not is_server_error(response.status_code), \
             f"Server error ({response.status_code}) for filename: {filename!r}"
 
+
     fuzzy_purposes = st.one_of(
         st.none(), st.booleans(), st.integers(),
         st.floats(allow_nan=False, allow_infinity=False),
@@ -362,8 +403,10 @@ def test_fuzz_purpose(self, purpose):
         file_obj = io.BytesIO(b"Purpose fuzz test")
         files = {"file": ("purpose_test.txt", file_obj, "text/plain")}
         if isinstance(purpose, bytes):
+             # Try decoding bytes, replace errors if needed for data field
              data = {"purpose": purpose.decode('utf-8', errors='replace')}
         else:
+             # Convert other types to string as multipart typically sends strings
              data = {"purpose": str(purpose)}
 
         try:
@@ -377,12 +420,14 @@ def test_fuzz_purpose(self, purpose):
         assert not is_server_error(response.status_code), \
             f"Server error ({response.status_code}) for purpose: {purpose!r}"
 
+        # Check logic based on whether the fuzz input matches the expected valid purpose
         if str(purpose) == EXPECTED_PURPOSE:
             assert response.status_code == 200, \
                 f"Expected 200 for valid purpose '{EXPECTED_PURPOSE}', got {response.status_code}"
         elif response.status_code == 200:
+             # This might indicate the server is too lenient with 'purpose' validation
              log_response(f"WARNING: Received 200 OK for unexpected purpose: {purpose!r}", test_name)
-        else: # Expecting 4xx
+        else: # Expecting 4xx for invalid purposes
             assert 400 <= response.status_code < 500, \
                 f"Expected client error (4xx) for invalid purpose {purpose!r}, got {response.status_code}"
 
@@ -442,8 +487,14 @@ def test_fuzz_combined(self, filename, purpose_val, content):
         assert not is_server_error(response.status_code), \
             f"Server error ({response.status_code}) for combined input: fn={filename!r}, p={purpose_val!r}, len={len(content)}"
 
-        # Further checks depend heavily on API logic (is purpose_val always invalid unless 'assistants'?)
+        # Basic logic checks, might need refinement based on specific API rules
         if purpose_val == EXPECTED_PURPOSE and response.status_code != 200:
              log_response(f"WARNING: Expected 200 for valid purpose but got {response.status_code} in combined test.", test_name)
         elif purpose_val != EXPECTED_PURPOSE and not (400 <= response.status_code < 500):
-             log_response(f"WARNING: Expected 4xx for invalid purpose '{purpose_val}' but got {response.status_code} in combined test.", test_name)
+             # If purpose is invalid, expect 4xx. If we get 200 or 5xx, log a warning/potentially fail.
+             # Allow 200 if *maybe* the filename or content caused a specific override? Less likely.
+             # Focus on avoiding 5xx primarily.
+             if response.status_code == 200:
+                 log_response(f"WARNING: Received 200 OK for invalid purpose '{purpose_val!r}' in combined test.", test_name)
+             else: # Not 200, not 4xx, not 5xx (already asserted above) -> Unexpected status code
+                 log_response(f"INFO: Unexpected status {response.status_code} for invalid purpose '{purpose_val!r}' in combined test.", test_name)

From 17a6fbaca839a15e81ac74905435020ba5cfbf06 Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Mon, 31 Mar 2025 18:56:17 +0530
Subject: [PATCH 5/6] Perform replacements outside the f-string to avoid
 SyntaxError

---
 engine/e2e-test/api/files/test_api_create_file.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 9ffcfdd1d..782880bd1 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -315,7 +315,8 @@ def test_api_create_file_very_large_file(self, tmp_path):
     ])
     def test_api_create_file_path_traversal_filename(self, malicious_filename):
         """Verify API rejects filenames attempting path traversal using '..'."""
-        test_name = f"test_api_create_file_path_traversal_filename_{malicious_filename.replace('/', '_').replace('\\', '_').replace('.', '_')}"
+        sanitized_part = malicious_filename.replace('/', '_').replace('\\', '_').replace('.', '_')
+        test_name = f"test_api_create_file_path_traversal_filename_{sanitized_part}"
         log_response(f"Testing potentially malicious filename: {malicious_filename!r}", test_name)
 
         file_content = b"Path traversal attempt"

From c209d62e19885d36b14dbb3ba60e7759b1a78e8c Mon Sep 17 00:00:00 2001
From: Akarshan Biswas <akarshan@menlo.ai>
Date: Mon, 31 Mar 2025 19:19:06 +0530
Subject: [PATCH 6/6] Fix path traversal tests

---
 .../api/files/test_api_create_file.py         | 30 ++++++++-----------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/engine/e2e-test/api/files/test_api_create_file.py b/engine/e2e-test/api/files/test_api_create_file.py
index 782880bd1..f499a69e8 100644
--- a/engine/e2e-test/api/files/test_api_create_file.py
+++ b/engine/e2e-test/api/files/test_api_create_file.py
@@ -26,6 +26,17 @@ def is_server_error(status_code):
 
 # --- Test Class ---
 class TestApiCreateFile:
+    base_malicious_filenames = [
+        "../sensitive.conf",
+        "test/../../../tmp/passwd",
+        "....//tricky.txt",
+        "file/name/with/../in/middle.txt",
+        "/absolute/path/../file.txt",
+    ]
+    test_filenames = list(base_malicious_filenames)
+    if platform.system() == "Windows":
+        test_filenames.append("..\\windows\\system32\\config")
+
 
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
@@ -301,18 +312,7 @@ def test_api_create_file_very_large_file(self, tmp_path):
                      log_response(f"Warning: Failed to delete large temp file {large_file_path}: {e}", test_name)
 
     # ----- Security Tests -----
-
-    @pytest.mark.parametrize("malicious_filename", [
-        "../sensitive.conf",
-        "test/../../etc/passwd",
-        "..\\windows\\system32\\config", # Windows style added for coverage
-        "....//tricky.txt",
-        "file/name/with/../in/middle.txt",
-        "/absolute/path/../file.txt",
-        "nul../file.txt",
-        "file.txt..",
-        "..file.txt"
-    ])
+    @pytest.mark.parametrize("malicious_filename", test_filenames)
     def test_api_create_file_path_traversal_filename(self, malicious_filename):
         """Verify API rejects filenames attempting path traversal using '..'."""
         sanitized_part = malicious_filename.replace('/', '_').replace('\\', '_').replace('.', '_')
@@ -325,11 +325,7 @@ def test_api_create_file_path_traversal_filename(self, malicious_filename):
         files = {"file": (malicious_filename, file_obj, "application/octet-stream")}
         data = {"purpose": EXPECTED_PURPOSE}
 
-        try:
-            response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
-        except requests.exceptions.RequestException as e:
-             log_response(f"Request failed client-side for filename {malicious_filename!r}: {e}", test_name)
-             pytest.fail(f"Request failed before reaching server for filename {malicious_filename!r}", pytrace=False)
+        response = requests.post(POST_FILE_URL, files=files, data=data, timeout=REQUEST_TIMEOUT)
 
         log_response(f"Status Code: {response.status_code}", test_name)
         log_response(f"Response Body (first 500 chars): {response.text[:500]}...", test_name)