1- from fastapi import APIRouter , Depends , Request , Response
1+ import asyncio
22
3+ from fastapi import APIRouter , Depends , HTTPException , Request , Response
4+
5+ from litellm ._logging import verbose_proxy_logger
36from litellm .proxy ._types import *
47from litellm .proxy .auth .user_api_key_auth import UserAPIKeyAuth , user_api_key_auth
58from litellm .proxy .common_request_processing import ProxyBaseLLMRequestProcessing
9+ from litellm .types .responses .main import DeleteResponseResult
610
711router = APIRouter ()
812
@@ -30,22 +34,40 @@ async def responses_api(
3034 """
3135 Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses
3236
37+ Supports background mode with polling_via_cache for partial response retrieval.
38+ When background=true and polling_via_cache is enabled, returns a polling_id immediately
39+ and streams the response in the background, updating Redis cache.
40+
3341 ```bash
42+ # Normal request
3443 curl -X POST http://localhost:4000/v1/responses \
3544 -H "Content-Type: application/json" \
3645 -H "Authorization: Bearer sk-1234" \
3746 -d '{
3847 "model": "gpt-4o",
3948 "input": "Tell me about AI"
4049 }'
50+
51+ # Background request with polling
52+ curl -X POST http://localhost:4000/v1/responses \
53+ -H "Content-Type: application/json" \
54+ -H "Authorization: Bearer sk-1234" \
55+ -d '{
56+ "model": "gpt-4o",
57+ "input": "Tell me about AI",
58+ "background": true
59+ }'
4160 ```
4261 """
4362 from litellm .proxy .proxy_server import (
4463 _read_request_body ,
4564 general_settings ,
4665 llm_router ,
66+ polling_cache_ttl ,
67+ polling_via_cache_enabled ,
4768 proxy_config ,
4869 proxy_logging_obj ,
70+ redis_usage_cache ,
4971 select_data_generator ,
5072 user_api_base ,
5173 user_max_tokens ,
@@ -56,6 +78,74 @@ async def responses_api(
5678 )
5779
5880 data = await _read_request_body (request = request )
81+
82+ # Check if polling via cache should be used for this request
83+ from litellm .proxy .response_polling .polling_handler import should_use_polling_for_request
84+
85+ should_use_polling = should_use_polling_for_request (
86+ background_mode = data .get ("background" , False ),
87+ polling_via_cache_enabled = polling_via_cache_enabled ,
88+ redis_cache = redis_usage_cache ,
89+ model = data .get ("model" , "" ),
90+ llm_router = llm_router ,
91+ )
92+
93+ # If polling is enabled, use polling mode
94+ if should_use_polling :
95+ from litellm .proxy .response_polling .polling_handler import (
96+ ResponsePollingHandler ,
97+ )
98+ from litellm .proxy .response_polling .background_streaming import (
99+ background_streaming_task ,
100+ )
101+
102+ verbose_proxy_logger .info (
103+ f"Starting background response with polling for model={ data .get ('model' )} "
104+ )
105+
106+ # Initialize polling handler with configured TTL (from global config)
107+ polling_handler = ResponsePollingHandler (
108+ redis_cache = redis_usage_cache ,
109+ ttl = polling_cache_ttl # Global var set at startup
110+ )
111+
112+ # Generate polling ID
113+ polling_id = ResponsePollingHandler .generate_polling_id ()
114+
115+ # Create initial state in Redis
116+ initial_state = await polling_handler .create_initial_state (
117+ polling_id = polling_id ,
118+ request_data = data ,
119+ )
120+
121+ # Start background task to stream and update cache
122+ asyncio .create_task (
123+ background_streaming_task (
124+ polling_id = polling_id ,
125+ data = data .copy (),
126+ polling_handler = polling_handler ,
127+ request = request ,
128+ fastapi_response = fastapi_response ,
129+ user_api_key_dict = user_api_key_dict ,
130+ general_settings = general_settings ,
131+ llm_router = llm_router ,
132+ proxy_config = proxy_config ,
133+ proxy_logging_obj = proxy_logging_obj ,
134+ select_data_generator = select_data_generator ,
135+ user_model = user_model ,
136+ user_temperature = user_temperature ,
137+ user_request_timeout = user_request_timeout ,
138+ user_max_tokens = user_max_tokens ,
139+ user_api_base = user_api_base ,
140+ version = version ,
141+ )
142+ )
143+
144+ # Return OpenAI Response object format (initial state)
145+ # https://platform.openai.com/docs/api-reference/responses/object
146+ return initial_state
147+
148+ # Normal response flow
59149 processor = ProxyBaseLLMRequestProcessing (data = data )
60150 try :
61151 return await processor .base_process_llm_request (
@@ -253,9 +343,18 @@ async def get_response(
253343 """
254344 Get a response by ID.
255345
346+ Supports both:
347+ - Polling IDs (litellm_poll_*): Returns cumulative cached content from background responses
348+ - Provider response IDs: Passes through to provider API
349+
256350 Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses/get
257351
258352 ```bash
353+ # Get polling response
354+ curl -X GET http://localhost:4000/v1/responses/litellm_poll_abc123 \
355+ -H "Authorization: Bearer sk-1234"
356+
357+ # Get provider response
259358 curl -X GET http://localhost:4000/v1/responses/resp_abc123 \
260359 -H "Authorization: Bearer sk-1234"
261360 ```
@@ -266,6 +365,7 @@ async def get_response(
266365 llm_router ,
267366 proxy_config ,
268367 proxy_logging_obj ,
368+ redis_usage_cache ,
269369 select_data_generator ,
270370 user_api_base ,
271371 user_max_tokens ,
@@ -274,7 +374,33 @@ async def get_response(
274374 user_temperature ,
275375 version ,
276376 )
277-
377+ from litellm .proxy .response_polling .polling_handler import ResponsePollingHandler
378+
379+ # Check if this is a polling ID
380+ if ResponsePollingHandler .is_polling_id (response_id ):
381+ # Handle polling response
382+ if not redis_usage_cache :
383+ raise HTTPException (
384+ status_code = 500 ,
385+ detail = "Redis cache not configured. Polling requires Redis."
386+ )
387+
388+ polling_handler = ResponsePollingHandler (redis_cache = redis_usage_cache )
389+
390+ # Get current state from cache
391+ state = await polling_handler .get_state (response_id )
392+
393+ if not state :
394+ raise HTTPException (
395+ status_code = 404 ,
396+ detail = f"Polling response { response_id } not found or expired"
397+ )
398+
399+ # Return the whole state directly (OpenAI Response object format)
400+ # https://platform.openai.com/docs/api-reference/responses/object
401+ return state
402+
403+ # Normal provider response flow
278404 data = await _read_request_body (request = request )
279405 data ["response_id" ] = response_id
280406 processor = ProxyBaseLLMRequestProcessing (data = data )
@@ -330,6 +456,10 @@ async def delete_response(
330456 """
331457 Delete a response by ID.
332458
459+ Supports both:
460+ - Polling IDs (litellm_poll_*): Deletes from Redis cache
461+ - Provider response IDs: Passes through to provider API
462+
333463 Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses/delete
334464
335465 ```bash
@@ -343,6 +473,7 @@ async def delete_response(
343473 llm_router ,
344474 proxy_config ,
345475 proxy_logging_obj ,
476+ redis_usage_cache ,
346477 select_data_generator ,
347478 user_api_base ,
348479 user_max_tokens ,
@@ -351,7 +482,44 @@ async def delete_response(
351482 user_temperature ,
352483 version ,
353484 )
354-
485+ from litellm .proxy .response_polling .polling_handler import ResponsePollingHandler
486+
487+ # Check if this is a polling ID
488+ if ResponsePollingHandler .is_polling_id (response_id ):
489+ # Handle polling response deletion
490+ if not redis_usage_cache :
491+ raise HTTPException (
492+ status_code = 500 ,
493+ detail = "Redis cache not configured."
494+ )
495+
496+ polling_handler = ResponsePollingHandler (redis_cache = redis_usage_cache )
497+
498+ # Get state to verify access
499+ state = await polling_handler .get_state (response_id )
500+
501+ if not state :
502+ raise HTTPException (
503+ status_code = 404 ,
504+ detail = f"Polling response { response_id } not found"
505+ )
506+
507+ # Delete from cache
508+ success = await polling_handler .delete_polling (response_id )
509+
510+ if success :
511+ return DeleteResponseResult (
512+ id = response_id ,
513+ object = "response" ,
514+ deleted = True
515+ )
516+ else :
517+ raise HTTPException (
518+ status_code = 500 ,
519+ detail = "Failed to delete polling response"
520+ )
521+
522+ # Normal provider response flow
355523 data = await _read_request_body (request = request )
356524 data ["response_id" ] = response_id
357525 processor = ProxyBaseLLMRequestProcessing (data = data )
@@ -475,9 +643,18 @@ async def cancel_response(
475643 """
476644 Cancel a response by ID.
477645
646+ Supports both:
647+ - Polling IDs (litellm_poll_*): Cancels background response and updates status in Redis
648+ - Provider response IDs: Passes through to provider API
649+
478650 Follows the OpenAI Responses API spec: https://platform.openai.com/docs/api-reference/responses/cancel
479651
480652 ```bash
653+ # Cancel polling response
654+ curl -X POST http://localhost:4000/v1/responses/litellm_poll_abc123/cancel \
655+ -H "Authorization: Bearer sk-1234"
656+
657+ # Cancel provider response
481658 curl -X POST http://localhost:4000/v1/responses/resp_abc123/cancel \
482659 -H "Authorization: Bearer sk-1234"
483660 ```
@@ -488,6 +665,7 @@ async def cancel_response(
488665 llm_router ,
489666 proxy_config ,
490667 proxy_logging_obj ,
668+ redis_usage_cache ,
491669 select_data_generator ,
492670 user_api_base ,
493671 user_max_tokens ,
@@ -496,7 +674,44 @@ async def cancel_response(
496674 user_temperature ,
497675 version ,
498676 )
499-
677+ from litellm .proxy .response_polling .polling_handler import ResponsePollingHandler
678+
679+ # Check if this is a polling ID
680+ if ResponsePollingHandler .is_polling_id (response_id ):
681+ # Handle polling response cancellation
682+ if not redis_usage_cache :
683+ raise HTTPException (
684+ status_code = 500 ,
685+ detail = "Redis cache not configured."
686+ )
687+
688+ polling_handler = ResponsePollingHandler (redis_cache = redis_usage_cache )
689+
690+ # Get current state to verify it exists
691+ state = await polling_handler .get_state (response_id )
692+
693+ if not state :
694+ raise HTTPException (
695+ status_code = 404 ,
696+ detail = f"Polling response { response_id } not found"
697+ )
698+
699+ # Cancel the polling response (sets status to "cancelled")
700+ success = await polling_handler .cancel_polling (response_id )
701+
702+ if success :
703+ # Fetch the updated state with cancelled status
704+ updated_state = await polling_handler .get_state (response_id )
705+
706+ # Return the whole state directly (now with status="cancelled")
707+ return updated_state
708+ else :
709+ raise HTTPException (
710+ status_code = 500 ,
711+ detail = "Failed to cancel polling response"
712+ )
713+
714+ # Normal provider response flow
500715 data = await _read_request_body (request = request )
501716 data ["response_id" ] = response_id
502717 processor = ProxyBaseLLMRequestProcessing (data = data )
0 commit comments