22
33import asyncio
44import base64
5- import contextlib
65import io
76import json
8- import json as jsonlib
97import random
108import re
119import time
12- from collections .abc import Callable
13- from datetime import datetime , timezone
1410from enum import Enum
1511from http import HTTPStatus
1612from typing import TYPE_CHECKING , Any , TypeVar , cast
2016from apify_client .errors import InvalidResponseBodyError
2117
2218if TYPE_CHECKING :
23- from collections .abc import Awaitable
19+ from collections .abc import Awaitable , Callable
2420
2521 from impit import Response
2622
2723 from apify_client .errors import ApifyApiError
2824
29- PARSE_DATE_FIELDS_MAX_DEPTH = 3
30- PARSE_DATE_FIELDS_KEY_SUFFIX = 'At'
31- RECORD_NOT_FOUND_EXCEPTION_TYPES = ['record-not-found' , 'record-or-token-not-found' ]
32-
3325T = TypeVar ('T' )
34- StopRetryingType = Callable [[], None ]
35- ListOrDict = TypeVar ('ListOrDict' , list , dict )
36-
3726
38- def filter_out_none_values_recursively (dictionary : dict ) -> dict :
39- """Return copy of the dictionary, recursively omitting all keys for which values are None."""
40- return cast ('dict' , filter_out_none_values_recursively_internal (dictionary ))
4127
42-
43- def filter_out_none_values_recursively_internal (
28+ def filter_out_none_values_recursively (
4429 dictionary : dict ,
4530 * ,
4631 remove_empty_dicts : bool | None = None ,
47- ) -> dict | None :
48- """Recursively filters out None values from a dictionary.
49-
50- Unfortunately, it's necessary to have an internal function for the correct result typing,
51- without having to create complicated overloads
52- """
53- result = {}
54- for k , v in dictionary .items ():
55- if isinstance (v , dict ):
56- v = filter_out_none_values_recursively_internal ( # noqa: PLW2901
57- v , remove_empty_dicts = remove_empty_dicts is True or remove_empty_dicts is None
58- )
59- if v is not None :
60- result [k ] = v
61- if not result and remove_empty_dicts :
62- return None
63- return result
64-
65-
66- def parse_date_fields (data : ListOrDict , max_depth : int = PARSE_DATE_FIELDS_MAX_DEPTH ) -> ListOrDict :
67- """Recursively parse date fields in a list or dictionary up to the specified depth."""
68- if max_depth < 0 :
69- return data
70-
71- if isinstance (data , list ):
72- return [parse_date_fields (item , max_depth - 1 ) for item in data ]
73-
74- if isinstance (data , dict ):
75-
76- def parse (key : str , value : object ) -> object :
77- parsed_value = value
78- if key .endswith (PARSE_DATE_FIELDS_KEY_SUFFIX ) and isinstance (value , str ):
79- with contextlib .suppress (ValueError ):
80- parsed_value = datetime .strptime (value , '%Y-%m-%dT%H:%M:%S.%fZ' ).replace (tzinfo = timezone .utc )
81- elif isinstance (value , dict ):
82- parsed_value = parse_date_fields (value , max_depth - 1 )
83- elif isinstance (value , list ):
84- parsed_value = parse_date_fields (value , max_depth )
85- return parsed_value
86-
87- return {key : parse (key , value ) for (key , value ) in data .items ()}
88-
89- return data
90-
32+ ) -> dict :
33+ """Return a copy of the dictionary with all None values recursively removed.
9134
92- def is_content_type_json ( content_type : str ) -> bool :
93- """Check if the given content type is JSON."""
94- return bool ( re . search ( r'^application/json' , content_type , flags = re . IGNORECASE ))
35+ Args :
36+ dictionary: The dictionary to filter.
37+ remove_empty_dicts: If True, also remove empty dictionaries after filtering.
9538
96-
97- def is_content_type_xml (content_type : str ) -> bool :
98- """Check if the given content type is XML."""
99- return bool (re .search (r'^application/.*xml$' , content_type , flags = re .IGNORECASE ))
100-
101-
102- def is_content_type_text (content_type : str ) -> bool :
103- """Check if the given content type is text."""
104- return bool (re .search (r'^text/' , content_type , flags = re .IGNORECASE ))
105-
106-
107- def is_file_or_bytes (value : Any ) -> bool :
108- """Check if the input value is a file-like object or bytes.
109-
110- The check for IOBase is not ideal, it would be better to use duck typing,
111- but then the check would be super complex, judging from how the 'requests' library does it.
112- This way should be good enough for the vast majority of use cases, if it causes issues, we can improve it later.
39+ Returns:
40+ A new dictionary without None values.
11341 """
114- return isinstance (value , (bytes , bytearray , io .IOBase ))
11542
43+ def _internal (dictionary : dict , * , remove_empty : bool | None = None ) -> dict | None :
44+ result = {}
45+ for key , val in dictionary .items ():
46+ if isinstance (val , dict ):
47+ val = _internal (val , remove_empty = remove_empty ) # noqa: PLW2901
48+ if val is not None :
49+ result [key ] = val
50+ if not result and remove_empty :
51+ return None
52+ return result
11653
117- def json_dumps (obj : Any ) -> str :
118- """Dump JSON to a string with the correct settings and serializer."""
119- return json .dumps (obj , ensure_ascii = False , indent = 2 , default = str )
54+ return cast ('dict' , _internal (dictionary , remove_empty = remove_empty_dicts ))
12055
12156
12257def maybe_extract_enum_member_value (maybe_enum_member : Any ) -> Any :
123- """Extract the value of an enumeration member if it is an Enum, otherwise return the original value ."""
58+ """Extract the value from an Enum member, or return the input unchanged if not an Enum ."""
12459 if isinstance (maybe_enum_member , Enum ):
12560 return maybe_enum_member .value
12661 return maybe_enum_member
12762
12863
12964def to_safe_id (id : str ) -> str :
130- # Identificators of resources in the API are either in the format `resource_id` or `username/resource_id`.
131- # Since the `/` character has a special meaning in URL paths,
132- # we replace it with `~` for proper route parsing on the API, where after parsing the URL it's replaced back to `/`.
65+ """Convert a resource ID to URL-safe format by replacing `/` with `~`.
66+
67+ Args:
68+ id: The resource identifier (format: `resource_id` or `username/resource_id`).
69+
70+ Returns:
71+ The resource identifier with `/` replaced by `~`.
72+ """
13373 return id .replace ('/' , '~' )
13474
13575
13676def pluck_data (parsed_response : Any ) -> dict :
77+ """Extract the "data" field from an API response.
78+
79+ Args:
80+ parsed_response: The parsed API response.
81+
82+ Returns:
83+ The value of the "data" field.
84+
85+ Raises:
86+ ValueError: If the "data" field is missing.
87+ """
13788 if isinstance (parsed_response , dict ) and 'data' in parsed_response :
13889 return cast ('dict' , parsed_response ['data' ])
13990
14091 raise ValueError ('The "data" property is missing in the response.' )
14192
14293
14394def pluck_data_as_list (parsed_response : Any ) -> list :
95+ """Extract the "data" field from an API response as a list.
96+
97+ Args:
98+ parsed_response: The parsed API response.
99+
100+ Returns:
101+ The value of the "data" field as a list.
102+
103+ Raises:
104+ ValueError: If the "data" field is missing.
105+ """
144106 if isinstance (parsed_response , dict ) and 'data' in parsed_response :
145107 return cast ('list' , parsed_response ['data' ])
146108
147109 raise ValueError ('The "data" property is missing in the response.' )
148110
149111
150112def retry_with_exp_backoff (
151- func : Callable [[StopRetryingType , int ], T ],
113+ func : Callable [[Callable [[], None ] , int ], T ],
152114 * ,
153115 max_retries : int = 8 ,
154116 backoff_base_millis : int = 500 ,
155117 backoff_factor : float = 2 ,
156118 random_factor : float = 1 ,
157119) -> T :
120+ """Retry a function with exponential backoff.
121+
122+ Args:
123+ func: Function to retry. Receives a stop_retrying callback and attempt number.
124+ max_retries: Maximum number of retry attempts.
125+ backoff_base_millis: Base backoff delay in milliseconds.
126+ backoff_factor: Exponential backoff multiplier (1-10).
127+ random_factor: Random jitter factor (0-1).
128+
129+ Returns:
130+ The return value of the function.
131+ """
158132 random_factor = min (max (0 , random_factor ), 1 )
159133 backoff_factor = min (max (1 , backoff_factor ), 10 )
160134 swallow = True
@@ -181,13 +155,25 @@ def stop_retrying() -> None:
181155
182156
183157async def retry_with_exp_backoff_async (
184- async_func : Callable [[StopRetryingType , int ], Awaitable [T ]],
158+ async_func : Callable [[Callable [[], None ] , int ], Awaitable [T ]],
185159 * ,
186160 max_retries : int = 8 ,
187161 backoff_base_millis : int = 500 ,
188162 backoff_factor : float = 2 ,
189163 random_factor : float = 1 ,
190164) -> T :
165+ """Retry an async function with exponential backoff.
166+
167+ Args:
168+ async_func: Async function to retry. Receives a stop_retrying callback and attempt number.
169+ max_retries: Maximum number of retry attempts.
170+ backoff_base_millis: Base backoff delay in milliseconds.
171+ backoff_factor: Exponential backoff multiplier (1-10).
172+ random_factor: Random jitter factor (0-1).
173+
174+ Returns:
175+ The return value of the async function.
176+ """
191177 random_factor = min (max (0 , random_factor ), 1 )
192178 backoff_factor = min (max (1 , backoff_factor ), 10 )
193179 swallow = True
@@ -214,14 +200,29 @@ def stop_retrying() -> None:
214200
215201
216202def catch_not_found_or_throw (exc : ApifyApiError ) -> None :
203+ """Suppress 404 Not Found errors, re-raise all other exceptions.
204+
205+ Args:
206+ exc: The API error to check.
207+
208+ Raises:
209+ ApifyApiError: If the error is not a 404 Not Found error.
210+ """
217211 is_not_found_status = exc .status_code == HTTPStatus .NOT_FOUND
218- is_not_found_type = exc .type in RECORD_NOT_FOUND_EXCEPTION_TYPES
212+ is_not_found_type = exc .type in [ 'record-not-found' , 'record-or-token-not-found' ]
219213 if not (is_not_found_status and is_not_found_type ):
220214 raise exc
221215
222216
223217def encode_webhook_list_to_base64 (webhooks : list [dict ]) -> str :
224- """Encode a list of dictionaries representing webhooks to their base64-encoded representation for the API."""
218+ """Encode a list of webhook dictionaries to base64 for API transmission.
219+
220+ Args:
221+ webhooks: List of webhook dictionaries with keys like "event_types", "request_url", etc.
222+
223+ Returns:
224+ Base64-encoded JSON string.
225+ """
225226 data = []
226227 for webhook in webhooks :
227228 webhook_representation = {
@@ -234,25 +235,49 @@ def encode_webhook_list_to_base64(webhooks: list[dict]) -> str:
234235 webhook_representation ['headersTemplate' ] = webhook ['headers_template' ]
235236 data .append (webhook_representation )
236237
237- return base64 .b64encode (jsonlib .dumps (data ).encode ('utf-8' )).decode ('ascii' )
238+ return base64 .b64encode (json .dumps (data ).encode ('utf-8' )).decode ('ascii' )
238239
239240
240241def encode_key_value_store_record_value (value : Any , content_type : str | None = None ) -> tuple [Any , str ]:
242+ """Encode a value for storage in a key-value store record.
243+
244+ Args:
245+ value: The value to encode (can be dict, str, bytes, or file-like object).
246+ content_type: The content type. If None, it's inferred from the value type.
247+
248+ Returns:
249+ A tuple of (encoded_value, content_type).
250+ """
241251 if not content_type :
242- if is_file_or_bytes (value ):
252+ if isinstance (value , ( bytes , bytearray , io . IOBase ) ):
243253 content_type = 'application/octet-stream'
244254 elif isinstance (value , str ):
245255 content_type = 'text/plain; charset=utf-8'
246256 else :
247257 content_type = 'application/json; charset=utf-8'
248258
249- if 'application/json' in content_type and not is_file_or_bytes (value ) and not isinstance (value , str ):
250- value = jsonlib .dumps (value , ensure_ascii = False , indent = 2 , allow_nan = False , default = str ).encode ('utf-8' )
259+ if (
260+ 'application/json' in content_type
261+ and not isinstance (value , (bytes , bytearray , io .IOBase ))
262+ and not isinstance (value , str )
263+ ):
264+ value = json .dumps (value , ensure_ascii = False , indent = 2 , allow_nan = False , default = str ).encode ('utf-8' )
251265
252266 return (value , content_type )
253267
254268
255269def maybe_parse_response (response : Response ) -> Any :
270+ """Parse an HTTP response based on its content type.
271+
272+ Args:
273+ response: The HTTP response to parse.
274+
275+ Returns:
276+ Parsed response data (JSON dict/list, text string, or raw bytes).
277+
278+ Raises:
279+ InvalidResponseBodyError: If the response body cannot be parsed.
280+ """
256281 if response .status_code == HTTPStatus .NO_CONTENT :
257282 return None
258283
@@ -261,9 +286,11 @@ def maybe_parse_response(response: Response) -> Any:
261286 content_type = response .headers ['content-type' ].split (';' )[0 ].strip ()
262287
263288 try :
264- if is_content_type_json ( content_type ):
289+ if re . search ( r'^application/json' , content_type , flags = re . IGNORECASE ):
265290 response_data = response .json ()
266- elif is_content_type_xml (content_type ) or is_content_type_text (content_type ):
291+ elif re .search (r'^application/.*xml$' , content_type , flags = re .IGNORECASE ) or re .search (
292+ r'^text/' , content_type , flags = re .IGNORECASE
293+ ):
267294 response_data = response .text
268295 else :
269296 response_data = response .content
@@ -274,7 +301,14 @@ def maybe_parse_response(response: Response) -> Any:
274301
275302
276303def is_retryable_error (exc : Exception ) -> bool :
277- """Check if the given error is retryable."""
304+ """Check if an exception should be retried.
305+
306+ Args:
307+ exc: The exception to check.
308+
309+ Returns:
310+ True if the exception is retryable (network errors, timeouts, etc.).
311+ """
278312 return isinstance (
279313 exc ,
280314 (
0 commit comments