From 6b3c7d562673318533942b2f3633f4373689d140 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Wed, 6 May 2026 21:13:20 +0900 Subject: [PATCH 01/17] =?UTF-8?q?feat:=20DTO=20=EB=B0=8F=20Controller=20?= =?UTF-8?q?=EB=AA=85=EC=84=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - DTO 및 Controller 선언 - 서비스 로직은 TODO --- .../FrequentlyQuestionChatApiController.java | 27 +++++++++++++++++++ .../com/cholog/bootcamp/data/TokenUsage.java | 8 ++++++ .../dto/FrequentlyQuestionChatRequestDto.java | 6 +++++ .../FrequentlyQuestionChatResponseDto.java | 6 +++++ 4 files changed, 47 insertions(+) create mode 100644 src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java create mode 100644 src/main/java/com/cholog/bootcamp/data/TokenUsage.java create mode 100644 src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatRequestDto.java create mode 100644 src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatResponseDto.java diff --git a/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java b/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java new file mode 100644 index 0000000..6b1a362 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java @@ -0,0 +1,27 @@ +package com.cholog.bootcamp.controller; + +import com.cholog.bootcamp.dto.FrequentlyQuestionChatRequestDto; +import com.cholog.bootcamp.dto.FrequentlyQuestionChatResponseDto; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/api/chat") +public class FrequentlyQuestionChatApiController { + + private static final Logger log = LoggerFactory.getLogger(FrequentlyQuestionChatApiController.class); + + @PostMapping + public ResponseEntity question( + @RequestBody FrequentlyQuestionChatRequestDto dto + ) { + log.info("FAQ 요청이 들어왔습니다. {}", dto.question()); + // TODO 서비스 레이어 및 응답 구현 + return ResponseEntity.ok(null); + } +} diff --git a/src/main/java/com/cholog/bootcamp/data/TokenUsage.java b/src/main/java/com/cholog/bootcamp/data/TokenUsage.java new file mode 100644 index 0000000..4747ef8 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/data/TokenUsage.java @@ -0,0 +1,8 @@ +package com.cholog.bootcamp.data; + +public record TokenUsage( + int promptTokens, + int completionTokens, + int totalTokens +) { +} \ No newline at end of file diff --git a/src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatRequestDto.java b/src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatRequestDto.java new file mode 100644 index 0000000..c5c9bde --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatRequestDto.java @@ -0,0 +1,6 @@ +package com.cholog.bootcamp.dto; + +public record FrequentlyQuestionChatRequestDto ( + String question +){ +} diff --git a/src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatResponseDto.java b/src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatResponseDto.java new file mode 100644 index 0000000..a4f2d89 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/dto/FrequentlyQuestionChatResponseDto.java @@ -0,0 +1,6 @@ +package com.cholog.bootcamp.dto; + +import com.cholog.bootcamp.data.TokenUsage; + +public record FrequentlyQuestionChatResponseDto(String answer, TokenUsage usage) { +} From b49fbb296f433cdc4542e39ba06d80adffdf4bae Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Sat, 9 May 2026 16:33:13 +0900 Subject: [PATCH 02/17] =?UTF-8?q?feat:=20gitignore=20=EC=B6=94=EA=B0=80=20?= =?UTF-8?q?=EB=B0=8F=20lombok=20=EC=9D=98=EC=A1=B4=EC=84=B1=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 개발 편의성 위한 lombok 추가 - local profile 용 gitignore 추가 --- .gitignore | 2 ++ build.gradle | 3 +++ 2 files changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 23c550a..1c4fe0c 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,5 @@ __pycache__/ # Matrix metadata **/.omc/ + +src/main/resources/application-*.yml \ No newline at end of file diff --git a/build.gradle b/build.gradle index 941e596..71883d3 100644 --- a/build.gradle +++ b/build.gradle @@ -26,8 +26,11 @@ dependencyManagement { dependencies { implementation 'org.springframework.boot:spring-boot-starter-web' implementation 'org.springframework.ai:spring-ai-starter-model-openai' + compileOnly 'org.projectlombok:lombok' testImplementation 'org.springframework.boot:spring-boot-starter-test' + + annotationProcessor 'org.projectlombok:lombok' } tasks.named('test') { From 8b092fa77ba4c93ec57477715a549d1ab277a487 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Sat, 9 May 2026 16:34:46 +0900 Subject: [PATCH 03/17] =?UTF-8?q?feat:=20chat=20=EC=84=9C=EB=B9=84?= =?UTF-8?q?=EC=8A=A4=20=EB=A1=9C=EC=A7=81=20=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - spring ai 통한 기능 구현 --- .../bootcamp/config/ChatClientConfig.java | 16 +++++++ .../FrequentlyQuestionChatApiController.java | 8 +++- .../com/cholog/bootcamp/data/TokenUsage.java | 12 ++++++ .../FrequentlyQuestionChatApiService.java | 43 +++++++++++++++++++ 4 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java create mode 100644 src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java diff --git a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java new file mode 100644 index 0000000..b3a4ce2 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java @@ -0,0 +1,16 @@ +package com.cholog.bootcamp.config; + +import lombok.NoArgsConstructor; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +@NoArgsConstructor +public class ChatClientConfig { + + @Bean + public ChatClient chatClient(ChatClient.Builder builder) { + return builder.build(); + } +} diff --git a/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java b/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java index 6b1a362..a3ab541 100644 --- a/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java +++ b/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java @@ -2,6 +2,8 @@ import com.cholog.bootcamp.dto.FrequentlyQuestionChatRequestDto; import com.cholog.bootcamp.dto.FrequentlyQuestionChatResponseDto; +import com.cholog.bootcamp.service.FrequentlyQuestionChatApiService; +import lombok.RequiredArgsConstructor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.http.ResponseEntity; @@ -12,16 +14,20 @@ @RestController @RequestMapping("/api/chat") +@RequiredArgsConstructor public class FrequentlyQuestionChatApiController { private static final Logger log = LoggerFactory.getLogger(FrequentlyQuestionChatApiController.class); + private final FrequentlyQuestionChatApiService frequentlyQuestionChatApiService; + @PostMapping public ResponseEntity question( @RequestBody FrequentlyQuestionChatRequestDto dto ) { log.info("FAQ 요청이 들어왔습니다. {}", dto.question()); // TODO 서비스 레이어 및 응답 구현 - return ResponseEntity.ok(null); + var response = frequentlyQuestionChatApiService.chat(dto); + return ResponseEntity.ok(response); } } diff --git a/src/main/java/com/cholog/bootcamp/data/TokenUsage.java b/src/main/java/com/cholog/bootcamp/data/TokenUsage.java index 4747ef8..ee7d271 100644 --- a/src/main/java/com/cholog/bootcamp/data/TokenUsage.java +++ b/src/main/java/com/cholog/bootcamp/data/TokenUsage.java @@ -1,8 +1,20 @@ package com.cholog.bootcamp.data; +import org.springframework.ai.chat.metadata.Usage; + public record TokenUsage( int promptTokens, int completionTokens, int totalTokens ) { + + public static TokenUsage EMPTY = new TokenUsage(0, 0, 0); + + public static TokenUsage from(Usage usage) { + return new TokenUsage( + usage.getPromptTokens(), + usage.getCompletionTokens(), + usage.getTotalTokens() + ); + } } \ No newline at end of file diff --git a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java new file mode 100644 index 0000000..95fbb8b --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java @@ -0,0 +1,43 @@ +package com.cholog.bootcamp.service; + +import com.cholog.bootcamp.data.TokenUsage; +import com.cholog.bootcamp.dto.FrequentlyQuestionChatRequestDto; +import com.cholog.bootcamp.dto.FrequentlyQuestionChatResponseDto; +import jakarta.annotation.PostConstruct; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.chat.metadata.Usage; +import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.stereotype.Service; + +@Slf4j +@Service +@RequiredArgsConstructor +public class FrequentlyQuestionChatApiService { + + private final ChatClient chatClient; + + public FrequentlyQuestionChatResponseDto chat(FrequentlyQuestionChatRequestDto requestDto) { + var prompt = Prompt.builder() + .content(requestDto.question()) + .build(); + + var response = chatClient.prompt(prompt) + .call() + .chatResponse(); + + if (response == null) { + return new FrequentlyQuestionChatResponseDto( + "응답이 없습니다.", TokenUsage.EMPTY + ); + } + + var generation = response.getResult().getOutput(); + var metadata = response.getMetadata(); + Usage usage = metadata.getUsage(); + + log.info("[{}] 결과: {}, 토큰 사용량: {}", metadata.getModel(), generation.getText(), usage); + return new FrequentlyQuestionChatResponseDto(generation.getText(), TokenUsage.from(usage)); + } +} From 77010104e0cfe0021576c14c106a2808940a5db1 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Sun, 10 May 2026 00:38:24 +0900 Subject: [PATCH 04/17] =?UTF-8?q?chore:=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20?= =?UTF-8?q?=EC=8A=A4=ED=81=AC=EB=A6=BD=ED=8A=B8=20=EB=B3=91=EB=A0=AC=20?= =?UTF-8?q?=EA=B0=80=EB=8A=A5=ED=95=98=EA=B2=8C=20=EB=B3=80=EA=B2=BD,=20?= =?UTF-8?q?=EB=AC=B8=EC=84=9C=EB=82=B4=20=EC=9E=98=EB=AA=BB=EB=90=9C=20?= =?UTF-8?q?=EB=B6=80=EB=B6=84=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - requirements 파일 추가 --- README.md | 7 +- data/evaluate.py | 163 +++++++++++++++++++++++++++++++----------- data/requirements.txt | 3 + 3 files changed, 131 insertions(+), 42 deletions(-) create mode 100644 data/requirements.txt diff --git a/README.md b/README.md index 7a0f14b..d88d313 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,13 @@ cd data # Python 환경 준비 python -m venv .venv -.venv/bin/pip install openai qdrant-client python-dotenv +.venv/bin/pip install -r requirements.txt -# 평가 실행 (judge 모델 gpt-4o-mini 사용, 100문항 기준 약 $0.5~1 추가 비용) +# 평가 실행 (judge 모델 gpt-4o-mini 사용, 150문항 기준 약 $1~1.5 추가 비용) .venv/bin/python evaluate.py + +# 빠르게 돌리려면 병렬 워커 활성화 (default: 1) +.venv/bin/python evaluate.py --parallel 10 ``` --- diff --git a/data/evaluate.py b/data/evaluate.py index ed941cd..ee02239 100644 --- a/data/evaluate.py +++ b/data/evaluate.py @@ -11,8 +11,9 @@ 실행: # 서버가 localhost:8080에서 실행 중이어야 합니다 .venv/bin/python evaluate.py - .venv/bin/python evaluate.py --verbose # 질문별 상세 출력 - .venv/bin/python evaluate.py --limit 10 # 처음 10개만 평가 + .venv/bin/python evaluate.py --verbose # 질문별 상세 출력 + .venv/bin/python evaluate.py --limit 10 # 처음 10개만 평가 + .venv/bin/python evaluate.py --parallel 10 # 병렬 워커 10개로 가속 비용: judge 모델(gpt-4o-mini) 사용, 100문항 기준 약 $0.3~0.5 @@ -22,6 +23,7 @@ import os import argparse import time +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import requests @@ -102,12 +104,43 @@ def judge_answer(question: str, expected: str, actual: str) -> dict: return {"score": 0, "reason": "판정 파싱 실패"} +# ─── 워커 ───────────────────────────────────────────────────────────────────── + +def process_question(q: dict, idx: int) -> dict: + """질문 1건을 처리해 결과 dict를 반환합니다. (스레드 안전)""" + start = time.time() + qid = q.get("id", f"Q{idx+1}") + question_ko = q["question_ko"] + expected = q["expected_answer"] + tier = q.get("tier", "unknown") + + response = ask_server(question_ko) + if response is None: + return {"qid": qid, "tier": tier, "status": "error", "question": question_ko, + "duration": time.time() - start} + + actual_answer = response.get("answer", "") + judgment = judge_answer(question_ko, expected, actual_answer) + score = judgment.get("score", 0) + + return { + "qid": qid, + "tier": tier, + "status": "ok", + "score": score, + "reason": judgment.get("reason", ""), + "question": question_ko, + "duration": time.time() - start, + } + + # ─── 메인 ───────────────────────────────────────────────────────────────────── def main(): parser = argparse.ArgumentParser(description="챗봇 품질 평가") parser.add_argument("--verbose", action="store_true", help="질문별 상세 출력") parser.add_argument("--limit", type=int, default=0, help="평가할 질문 수 제한 (0=전체)") + parser.add_argument("--parallel", type=int, default=1, help="병렬 워커 수 (default: 1, 순차 실행)") args = parser.parse_args() # 테스트 질문 로드 @@ -122,6 +155,8 @@ def main(): print(f"서버: {SERVER_URL}") print(f"질문 수: {len(questions)}") print(f"판정 모델: {JUDGE_MODEL}") + if args.parallel > 1: + print(f"병렬 워커: {args.parallel}") print() # 서버 연결 확인 @@ -133,48 +168,93 @@ def main(): results = {"correct": 0, "incorrect": 0, "error": 0} tier_results = {} + durations = [] start_time = time.time() - for i, q in enumerate(questions): - qid = q.get("id", f"Q{i+1}") - question_ko = q["question_ko"] - expected = q["expected_answer"] - tier = q.get("tier", "unknown") + if args.parallel > 1: + # ─── 병렬 실행 (워커는 결과만 반환, 집계는 메인 스레드에서) ─── + completed = 0 + with ThreadPoolExecutor(max_workers=args.parallel) as executor: + futures = [executor.submit(process_question, q, i) for i, q in enumerate(questions)] + + for fut in as_completed(futures): + r = fut.result() + durations.append(r["duration"]) + completed += 1 + tier = r["tier"] + + if tier not in tier_results: + tier_results[tier] = {"correct": 0, "total": 0} + tier_results[tier]["total"] += 1 + + if r["status"] == "error": + results["error"] += 1 + if args.verbose: + print(f"[{r['qid']}] ERROR — 서버 응답 없음") + else: + score = r["score"] + if score == 1: + results["correct"] += 1 + tier_results[tier]["correct"] += 1 + marker = "✓" + else: + results["incorrect"] += 1 + marker = "✗" + + if args.verbose: + print(f"[{r['qid']}] {marker} ({tier}) {r['question'][:40]}...") + if score == 0: + print(f" 이유: {r['reason'][:80]}") + + # 진행률 (10개마다) + if not args.verbose and completed % 10 == 0: + print(f" 진행: {completed}/{len(questions)}") + else: + # ─── 순차 실행 (기본) ─── + for i, q in enumerate(questions): + q_start = time.time() + qid = q.get("id", f"Q{i+1}") + question_ko = q["question_ko"] + expected = q["expected_answer"] + tier = q.get("tier", "unknown") + + if tier not in tier_results: + tier_results[tier] = {"correct": 0, "total": 0} + tier_results[tier]["total"] += 1 + + # 서버에 질문 + response = ask_server(question_ko) + if response is None: + results["error"] += 1 + durations.append(time.time() - q_start) + if args.verbose: + print(f"[{qid}] ERROR — 서버 응답 없음") + continue + + actual_answer = response.get("answer", "") + + # LLM 판정 + judgment = judge_answer(question_ko, expected, actual_answer) + score = judgment.get("score", 0) + + if score == 1: + results["correct"] += 1 + tier_results[tier]["correct"] += 1 + marker = "✓" + else: + results["incorrect"] += 1 + marker = "✗" - if tier not in tier_results: - tier_results[tier] = {"correct": 0, "total": 0} - tier_results[tier]["total"] += 1 - - # 서버에 질문 - response = ask_server(question_ko) - if response is None: - results["error"] += 1 if args.verbose: - print(f"[{qid}] ERROR — 서버 응답 없음") - continue - - actual_answer = response.get("answer", "") - - # LLM 판정 - judgment = judge_answer(question_ko, expected, actual_answer) - score = judgment.get("score", 0) - - if score == 1: - results["correct"] += 1 - tier_results[tier]["correct"] += 1 - marker = "✓" - else: - results["incorrect"] += 1 - marker = "✗" + print(f"[{qid}] {marker} ({tier}) {question_ko[:40]}...") + if score == 0: + print(f" 이유: {judgment.get('reason', '')[:80]}") - if args.verbose: - print(f"[{qid}] {marker} ({tier}) {question_ko[:40]}...") - if score == 0: - print(f" 이유: {judgment.get('reason', '')[:80]}") + durations.append(time.time() - q_start) - # 진행률 (10개마다) - if not args.verbose and (i + 1) % 10 == 0: - print(f" 진행: {i+1}/{len(questions)}") + # 진행률 (10개마다) + if not args.verbose and (i + 1) % 10 == 0: + print(f" 진행: {i+1}/{len(questions)}") # 결과 출력 elapsed = time.time() - start_time @@ -194,8 +274,10 @@ def main(): if results["error"] > 0: print(f"\n 에러: {results['error']}건") - print(f"\n소요 시간: {elapsed:.1f}초") - print(f"평균 응답: {elapsed/max(total,1):.1f}초/질문") + print(f"\n벽시계 시간: {elapsed:.1f}초") + if durations: + avg_response = sum(durations) / len(durations) + print(f"평균 응답: {avg_response:.1f}초/질문") # 결과 저장 result_file = DATA_DIR / "eval_result.json" @@ -208,6 +290,7 @@ def main(): "accuracy": results["correct"] / max(total, 1), "tier_results": tier_results, "elapsed_seconds": elapsed, + "avg_response_seconds": (sum(durations) / len(durations)) if durations else 0, }, f, indent=2, ensure_ascii=False) print(f"\n결과 저장: {result_file}") diff --git a/data/requirements.txt b/data/requirements.txt new file mode 100644 index 0000000..521ddde --- /dev/null +++ b/data/requirements.txt @@ -0,0 +1,3 @@ +requests +openai +python-dotenv From 41fe308f75ab83f222eb1d341cb8c9b0530632d7 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Sun, 10 May 2026 14:01:24 +0900 Subject: [PATCH 05/17] =?UTF-8?q?feat:=20=EA=B8=B0=EB=8A=A5=20=EB=B9=84?= =?UTF-8?q?=EC=9A=A9=EA=B3=84=EC=82=B0=20=EB=A1=9C=EA=B7=B8=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - litellm github 기반 비용 JSON 로드해서 사용 - spring ai 표준 기반 처리(캐시 비용 처리X) --- .../com/cholog/bootcamp/data/ModelPrice.java | 22 + .../com/cholog/bootcamp/data/TokenUsage.java | 2 + .../FrequentlyQuestionChatApiService.java | 22 +- .../bootcamp/service/PricingCalculator.java | 77 + src/main/resources/model_prices.json | 40041 ++++++++++++++++ 5 files changed, 40159 insertions(+), 5 deletions(-) create mode 100644 src/main/java/com/cholog/bootcamp/data/ModelPrice.java create mode 100644 src/main/java/com/cholog/bootcamp/service/PricingCalculator.java create mode 100644 src/main/resources/model_prices.json diff --git a/src/main/java/com/cholog/bootcamp/data/ModelPrice.java b/src/main/java/com/cholog/bootcamp/data/ModelPrice.java new file mode 100644 index 0000000..59a807f --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/data/ModelPrice.java @@ -0,0 +1,22 @@ +package com.cholog.bootcamp.data; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.springframework.ai.chat.metadata.Usage; +import org.springframework.lang.Nullable; + +import java.math.BigDecimal; + +@JsonIgnoreProperties(ignoreUnknown = true) +public record ModelPrice( + @JsonProperty("input_cost_per_token") BigDecimal inputCostPerToken, + @JsonProperty("output_cost_per_token") BigDecimal outputCostPerToken, + @Nullable @JsonProperty("cache_read_input_token_cost") BigDecimal cacheReadCost, + @Nullable @JsonProperty("cache_creation_input_token_cost") BigDecimal cacheWriteCost, + @JsonProperty("supports_prompt_caching") Boolean supportsPromptCaching +) { + public BigDecimal calculate(TokenUsage usage) { + return inputCostPerToken.multiply(BigDecimal.valueOf(usage.promptTokens())) + .add(outputCostPerToken.multiply(BigDecimal.valueOf(usage.completionTokens()))); + } +} \ No newline at end of file diff --git a/src/main/java/com/cholog/bootcamp/data/TokenUsage.java b/src/main/java/com/cholog/bootcamp/data/TokenUsage.java index ee7d271..adbfecf 100644 --- a/src/main/java/com/cholog/bootcamp/data/TokenUsage.java +++ b/src/main/java/com/cholog/bootcamp/data/TokenUsage.java @@ -1,7 +1,9 @@ package com.cholog.bootcamp.data; +import com.fasterxml.jackson.annotation.JsonInclude; import org.springframework.ai.chat.metadata.Usage; +@JsonInclude(JsonInclude.Include.NON_NULL) public record TokenUsage( int promptTokens, int completionTokens, diff --git a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java index 95fbb8b..85acae4 100644 --- a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java +++ b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java @@ -3,20 +3,21 @@ import com.cholog.bootcamp.data.TokenUsage; import com.cholog.bootcamp.dto.FrequentlyQuestionChatRequestDto; import com.cholog.bootcamp.dto.FrequentlyQuestionChatResponseDto; -import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; -import org.springframework.ai.chat.metadata.Usage; import org.springframework.ai.chat.prompt.Prompt; import org.springframework.stereotype.Service; +import java.math.BigDecimal; + @Slf4j @Service @RequiredArgsConstructor public class FrequentlyQuestionChatApiService { private final ChatClient chatClient; + private final PricingCalculator pricingCalculator; public FrequentlyQuestionChatResponseDto chat(FrequentlyQuestionChatRequestDto requestDto) { var prompt = Prompt.builder() @@ -35,9 +36,20 @@ public FrequentlyQuestionChatResponseDto chat(FrequentlyQuestionChatRequestDto r var generation = response.getResult().getOutput(); var metadata = response.getMetadata(); - Usage usage = metadata.getUsage(); - log.info("[{}] 결과: {}, 토큰 사용량: {}", metadata.getModel(), generation.getText(), usage); - return new FrequentlyQuestionChatResponseDto(generation.getText(), TokenUsage.from(usage)); + var usage = TokenUsage.from(metadata.getUsage()); + var price = calculateModelPrice(metadata.getModel(), usage); + + log.info("[{}] 토큰 사용량: {}, 토큰 비용: {}$\n결과: {}", metadata.getModel(), usage, price, generation.getText()); + return new FrequentlyQuestionChatResponseDto(generation.getText(), usage); + } + + private BigDecimal calculateModelPrice(String model, TokenUsage usage) { + try { + return pricingCalculator.calculatePrice(model, usage); + } catch (Exception e) { + log.info("토큰 비용 계산에 실패했습니다. 모델: {}, 메시지: {}", model, e.getMessage(), e); + return BigDecimal.ZERO; + } } } diff --git a/src/main/java/com/cholog/bootcamp/service/PricingCalculator.java b/src/main/java/com/cholog/bootcamp/service/PricingCalculator.java new file mode 100644 index 0000000..7cadbb4 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/service/PricingCalculator.java @@ -0,0 +1,77 @@ +package com.cholog.bootcamp.service; + +import com.cholog.bootcamp.data.ModelPrice; +import com.cholog.bootcamp.data.TokenUsage; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.annotation.PostConstruct; +import lombok.extern.slf4j.Slf4j; +import org.springframework.core.io.ClassPathResource; +import org.springframework.stereotype.Service; + +import java.math.BigDecimal; +import java.util.Map; +import java.util.Optional; + +/** + * 모델별 토큰 사용량 기반으로 API 호출 비용을 계산하는 서비스. + * + *

가격 정보는 클래스패스의 {@code model_prices.json} 파일에서 로드된다. + * 이 파일은 BerriAI/litellm 저장소의 가격 데이터를 그대로 사용하며, + * 빌드 전 아래 명령으로 수동 갱신해야 한다. + * + *

{@code
+ * curl -o src/main/resources/model_prices.json \
+ *   https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
+ * }
+ * + *

출처: + * + * model_prices_and_context_window.json (BerriAI/litellm) + * + *

{@link PostConstruct} 시점에 한 번 로드되어 메모리에 캐시되며, + * 런타임 중에는 외부 네트워크 호출이 발생하지 않는다. + */ +@Slf4j +@Service +public class PricingCalculator { + + private final ObjectMapper mapper = new ObjectMapper(); + private Map prices; + + private static final String CLASSPATH_FILE = "model_prices.json"; + + @PostConstruct + public void init() throws Exception { + var resource = new ClassPathResource(CLASSPATH_FILE); + try (var is = resource.getInputStream()) { + prices = mapper.readValue(is, mapper.getTypeFactory() + .constructMapType(Map.class, String.class, ModelPrice.class)); + } + log.info("모델 JSON 을 로딩완료. 개수:{}", prices.size()); + } + + /** + * 지정한 모델의 토큰 사용량으로 비용을 계산한다. + * + * @param model 모델명 (예: {@code "gpt-4o-mini"}) + * @param usage 입출력 토큰 사용량 + * @return 계산된 비용 (USD) + * @throws IllegalArgumentException {@code model}이 가격 데이터에 없을 때 + */ + public BigDecimal calculatePrice(String model, TokenUsage usage) { + var modelPrice = find(model); + return modelPrice.calculate(usage); + } + + /** + * 모델명으로 가격 정보를 조회한다. + * + * @param model 모델명 + * @return 해당 모델의 가격 정보 + * @throws IllegalArgumentException {@code model}이 가격 데이터에 없을 때 + */ + public ModelPrice find(String model) { + return Optional.ofNullable(prices.get(model)) + .orElseThrow(() -> new IllegalArgumentException("데이터에 해당 모델이 없습니다. 모델명: %s".formatted(model))); + } +} diff --git a/src/main/resources/model_prices.json b/src/main/resources/model_prices.json new file mode 100644 index 0000000..e66ad8e --- /dev/null +++ b/src/main/resources/model_prices.json @@ -0,0 +1,40041 @@ +{ + "sample_spec": { + "code_interpreter_cost_per_session": 0.0, + "computer_use_input_cost_per_1k_tokens": 0.0, + "computer_use_output_cost_per_1k_tokens": 0.0, + "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD", + "file_search_cost_per_1k_calls": 0.0, + "file_search_cost_per_gb_per_day": 0.0, + "input_cost_per_audio_token": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "one of https://docs.litellm.ai/docs/providers", + "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", + "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", + "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", + "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank, search", + "output_cost_per_reasoning_token": 0.0, + "output_cost_per_token": 0.0, + "search_context_cost_per_query": { + "search_context_size_high": 0.0, + "search_context_size_low": 0.0, + "search_context_size_medium": 0.0 + }, + "supported_regions": [ + "global", + "us-west-2", + "eu-west-1", + "ap-southeast-1", + "ap-northeast-1" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "vector_store_cost_per_gb_per_day": 0.0 + }, + "1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06 + }, + "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.04 + }, + "1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 1.9e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.08 + }, + "256-x-256/dall-e-2": { + "input_cost_per_pixel": 2.4414e-07, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.018 + }, + "512-x-512/dall-e-2": { + "input_cost_per_pixel": 6.86e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.036 + }, + "ai21.j2-mid-v1": { + "input_cost_per_token": 1.25e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 8191, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.25e-05 + }, + "ai21.j2-ultra-v1": { + "input_cost_per_token": 1.88e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 8191, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.88e-05 + }, + "ai21.jamba-1-5-large-v1:0": { + "input_cost_per_token": 2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06 + }, + "ai21.jamba-1-5-mini-v1:0": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07 + }, + "ai21.jamba-instruct-v1:0": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 70000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7e-07, + "supports_system_messages": true + }, + "aiml/dall-e-2": { + "litellm_provider": "aiml", + "metadata": { + "notes": "DALL-E 2 via AI/ML API - Reliable text-to-image generation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.026, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/dall-e-3": { + "litellm_provider": "aiml", + "metadata": { + "notes": "DALL-E 3 via AI/ML API - High-quality text-to-image generation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.052, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux-pro": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Dev - Development version optimized for experimentation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.065, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux-pro/v1.1": { + "litellm_provider": "aiml", + "mode": "image_generation", + "output_cost_per_image": 0.052, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux-pro/v1.1-ultra": { + "litellm_provider": "aiml", + "mode": "image_generation", + "output_cost_per_image": 0.063, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux-realism": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Pro - Professional-grade image generation model" + }, + "mode": "image_generation", + "output_cost_per_image": 0.046, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux/dev": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Dev - Development version optimized for experimentation" + }, + "mode": "image_generation", + "output_cost_per_image": 0.033, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux/kontext-max/text-to-image": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Pro v1.1 - Enhanced version with improved capabilities and 6x faster inference speed" + }, + "mode": "image_generation", + "output_cost_per_image": 0.104, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux/kontext-pro/text-to-image": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Pro v1.1 - Enhanced version with improved capabilities and 6x faster inference speed" + }, + "mode": "image_generation", + "output_cost_per_image": 0.052, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/flux/schnell": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Flux Schnell - Fast generation model optimized for speed" + }, + "mode": "image_generation", + "output_cost_per_image": 0.004, + "source": "https://docs.aimlapi.com/", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/google/imagen-4.0-ultra-generate-001": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Imagen 4.0 Ultra Generate API - Photorealistic image generation with precise text rendering" + }, + "mode": "image_generation", + "output_cost_per_image": 0.078, + "source": "https://docs.aimlapi.com/api-references/image-models/google/imagen-4-ultra-generate", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "aiml/google/nano-banana-pro": { + "litellm_provider": "aiml", + "metadata": { + "notes": "Gemini 3 Pro Image (Nano Banana Pro) - Advanced text-to-image generation with reasoning and 4K resolution support" + }, + "mode": "image_generation", + "output_cost_per_image": 0.195, + "source": "https://docs.aimlapi.com/api-references/image-models/google/gemini-3-pro-image-preview", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supports_nova_canvas_image_edit": true + }, + "us.amazon.nova-canvas-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 2600, + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supports_nova_canvas_image_edit": true + }, + "us.writer.palmyra-x4-v1:0": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "us.writer.palmyra-x5-v1:0": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "writer.palmyra-x4-v1:0": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "writer.palmyra-x5-v1:0": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_pdf_input": true + }, + "amazon.nova-lite-v1:0": { + "input_cost_per_token": 6e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.4e-07, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-07, + "input_cost_per_token": 2.1875e-06, + "input_cost_per_image_token": 2.1875e-06, + "input_cost_per_audio_token": 2.1875e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.75e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "apac.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 8.25e-08, + "input_cost_per_token": 3.3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "apac.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-07, + "input_cost_per_token": 2.1875e-06, + "input_cost_per_image_token": 2.1875e-06, + "input_cost_per_audio_token": 2.1875e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.75e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "eu.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 8.25e-08, + "input_cost_per_token": 3.3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "eu.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-07, + "input_cost_per_token": 2.1875e-06, + "input_cost_per_image_token": 2.1875e-06, + "input_cost_per_audio_token": 2.1875e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.75e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "us.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 8.25e-08, + "input_cost_per_token": 3.3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "us.amazon.nova-2-pro-preview-20251202-v1:0": { + "cache_read_input_token_cost": 5.46875e-07, + "input_cost_per_token": 2.1875e-06, + "input_cost_per_image_token": 2.1875e-06, + "input_cost_per_audio_token": 2.1875e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.75e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "amazon.nova-2-multimodal-embeddings-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 8172, + "max_tokens": 8172, + "mode": "embedding", + "input_cost_per_token": 1.35e-07, + "input_cost_per_image": 6e-05, + "input_cost_per_video_per_second": 0.0007, + "input_cost_per_audio_per_second": 0.00014, + "output_cost_per_token": 0.0, + "output_vector_size": 3072, + "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/model-catalog/serverless/amazon.nova-2-multimodal-embeddings-v1:0", + "supports_embedding_image_input": true, + "supports_image_input": true, + "supports_video_input": true, + "supports_audio_input": true + }, + "amazon.nova-micro-v1:0": { + "input_cost_per_token": 3.5e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.4e-07, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "amazon.nova-pro-v1:0": { + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon.rerank-v1:0": { + "input_cost_per_query": 0.001, + "input_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "max_document_chunks_per_query": 100, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "max_tokens_per_document_chunk": 512, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "amazon.titan-embed-image-v1": { + "input_cost_per_image": 6e-05, + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128, + "max_tokens": 128, + "metadata": { + "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=amazon.titan-image-generator-v1", + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "amazon.titan-embed-text-v1": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "amazon.titan-embed-text-v2:0": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "amazon.titan-image-generator-v1": { + "input_cost_per_image": 0.0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_512_and_512_pixels": 0.01, + "output_cost_per_image_above_512_and_512_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2": { + "input_cost_per_image": 0.0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "amazon.titan-image-generator-v2:0": { + "input_cost_per_image": 0.0, + "output_cost_per_image": 0.008, + "output_cost_per_image_premium_image": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels": 0.01, + "output_cost_per_image_above_1024_and_1024_pixels_and_premium_image": 0.012, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "twelvelabs.marengo-embed-2-7-v1:0": { + "input_cost_per_token": 7e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "us.twelvelabs.marengo-embed-2-7-v1:0": { + "input_cost_per_token": 7e-05, + "input_cost_per_video_per_second": 0.0007, + "input_cost_per_audio_per_second": 0.00014, + "input_cost_per_image": 0.0001, + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "eu.twelvelabs.marengo-embed-2-7-v1:0": { + "input_cost_per_token": 7e-05, + "input_cost_per_video_per_second": 0.0007, + "input_cost_per_audio_per_second": 0.00014, + "input_cost_per_image": 0.0001, + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "twelvelabs.pegasus-1-2-v1:0": { + "input_cost_per_video_per_second": 0.00049, + "output_cost_per_token": 7.5e-06, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_video_input": true + }, + "us.twelvelabs.pegasus-1-2-v1:0": { + "input_cost_per_video_per_second": 0.00049, + "output_cost_per_token": 7.5e-06, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_video_input": true + }, + "eu.twelvelabs.pegasus-1-2-v1:0": { + "input_cost_per_video_per_second": 0.00049, + "output_cost_per_token": 7.5e-06, + "litellm_provider": "bedrock", + "mode": "chat", + "supports_video_input": true + }, + "amazon.titan-text-express-v1": { + "input_cost_per_token": 1.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.7e-06 + }, + "amazon.titan-text-lite-v1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-07 + }, + "amazon.titan-text-premier-v1:0": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-06 + }, + "anthropic.claude-3-5-haiku-20241022-v1:0": { + "cache_creation_input_token_cost": 1e-06, + "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "anthropic.claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_streaming": true, + "supports_native_structured_output": true + }, + "anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 3e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "cache_creation_input_token_cost_above_1hr": 7.5e-06, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.5e-05, + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07 + }, + "anthropic.claude-3-5-sonnet-20241022-v2:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 3e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "cache_creation_input_token_cost_above_1hr": 7.5e-06, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.5e-05 + }, + "anthropic.claude-3-7-sonnet-20240620-v1:0": { + "cache_creation_input_token_cost": 4.5e-06, + "cache_read_input_token_cost": 3.6e-07, + "input_cost_per_token": 3.6e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 + }, + "anthropic.claude-3-opus-20240229-v1:0": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 1.875e-05 + }, + "anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "anthropic.claude-instant-v1": { + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-06, + "supports_tool_choice": true + }, + "anthropic.claude-opus-4-1-20250805-v1:0": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-opus-4-20250514-v1:0": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_minimal_reasoning_effort": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true + }, + "anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "global.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "us.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "eu.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "au.anthropic.claude-opus-4-6-v1": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "anthropic.claude-opus-4-7": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "anthropic.claude-mythos-preview": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_minimal_reasoning_effort": true, + "supports_tool_choice": true + }, + "global.anthropic.claude-opus-4-7": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "us.anthropic.claude-opus-4-7": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "eu.anthropic.claude-opus-4-7": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "au.anthropic.claude-opus-4-7": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_minimal_reasoning_effort": true + }, + "global.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_minimal_reasoning_effort": true + }, + "us.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_creation_input_token_cost_above_1hr": 6.6e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_minimal_reasoning_effort": true + }, + "eu.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_minimal_reasoning_effort": true + }, + "au.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_minimal_reasoning_effort": true + }, + "anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.2e-05, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true + }, + "anthropic.claude-v1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05 + }, + "anthropic.claude-v2:1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "anyscale/HuggingFaceH4/zephyr-7b-beta": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-07 + }, + "anyscale/codellama/CodeLlama-34b-Instruct-hf": { + "input_cost_per_token": 1e-06, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-06 + }, + "anyscale/codellama/CodeLlama-70b-Instruct-hf": { + "input_cost_per_token": 1e-06, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-06, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf" + }, + "anyscale/google/gemma-7b-it": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/google-gemma-7b-it" + }, + "anyscale/meta-llama/Llama-2-13b-chat-hf": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-07 + }, + "anyscale/meta-llama/Llama-2-70b-chat-hf": { + "input_cost_per_token": 1e-06, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-06 + }, + "anyscale/meta-llama/Llama-2-7b-chat-hf": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-07 + }, + "anyscale/meta-llama/Meta-Llama-3-70B-Instruct": { + "input_cost_per_token": 1e-06, + "litellm_provider": "anyscale", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-06, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct" + }, + "anyscale/meta-llama/Meta-Llama-3-8B-Instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct" + }, + "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mistral-7B-Instruct-v0.1", + "supports_function_calling": true + }, + "anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1": { + "input_cost_per_token": 9e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x22B-Instruct-v0.1", + "supports_function_calling": true + }, + "anyscale/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "anyscale", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/mistralai-Mixtral-8x7B-Instruct-v0.1", + "supports_function_calling": true + }, + "apac.amazon.nova-lite-v1:0": { + "input_cost_per_token": 6.3e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.52e-07, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "apac.amazon.nova-micro-v1:0": { + "input_cost_per_token": 3.7e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.48e-07, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "apac.amazon.nova-pro-v1:0": { + "input_cost_per_token": 8.4e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.36e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "apac.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "apac.anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 + }, + "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-06, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "apac.anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "apac.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "assemblyai/best": { + "input_cost_per_second": 3.333e-05, + "litellm_provider": "assemblyai", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "assemblyai/nano": { + "input_cost_per_second": 0.00010278, + "litellm_provider": "assemblyai", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "azure/ada": { + "input_cost_per_token": 1e-07, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/codex-mini": { + "cache_read_input_token_cost": 3.75e-07, + "input_cost_per_token": 1.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 6e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/command-r-plus": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true + }, + "azure_ai/claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_minimal_reasoning_effort": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-opus-4-6": { + "input_cost_per_token": 5e-06, + "output_cost_per_token": 2.5e-05, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "azure_ai/claude-opus-4-7": { + "input_cost_per_token": 5e-06, + "output_cost_per_token": 2.5e-05, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 159, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "azure_ai/claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/claude-sonnet-4-6": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_minimal_reasoning_effort": true + }, + "azure/computer-use-preview": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/container": { + "code_interpreter_cost_per_session": 0.03, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure_ai/gpt-oss-120b": { + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "azure_ai/model_router": { + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 0, + "litellm_provider": "azure_ai", + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-services/", + "comment": "Flat cost of $0.14 per M input tokens for Azure AI Foundry Model Router infrastructure. Use pattern: azure_ai/model_router/ where deployment-name is your Azure deployment (e.g., azure-model-router)" + }, + "azure/eu/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.375e-06, + "input_cost_per_token": 2.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_creation_input_token_cost": 1.38e-06, + "input_cost_per_token": 2.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 8.3e-08, + "input_cost_per_token": 1.65e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3.3e-07, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_audio_token": 1.1e-05, + "input_cost_per_token": 6.6e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2.2e-05, + "output_cost_per_token": 2.64e-06, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2.2e-05, + "cache_read_input_token_cost": 2.75e-06, + "input_cost_per_audio_token": 0.00011, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00022, + "output_cost_per_token": 2.2e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_audio_token_cost": 2.5e-06, + "cache_read_input_token_cost": 2.75e-06, + "input_cost_per_audio_token": 4.4e-05, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 2.2e-05, + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/eu/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.375e-07, + "input_cost_per_token": 1.375e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.75e-08, + "input_cost_per_token": 2.75e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1": { + "cache_read_input_token_cost": 1.4e-07, + "input_cost_per_token": 1.38e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/eu/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.4e-07, + "input_cost_per_token": 1.38e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/eu/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.4e-07, + "input_cost_per_token": 1.38e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.75e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2.2e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5.5e-09, + "input_cost_per_token": 5.5e-08, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.4e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/o1-2024-12-17": { + "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 1.65e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6.6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/eu/o1-mini-2024-09-12": { + "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 1.21e-06, + "input_cost_per_token_batches": 6.05e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.84e-06, + "output_cost_per_token_batches": 2.42e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/eu/o1-preview-2024-09-12": { + "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 1.65e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/eu/o3-mini-2025-01-31": { + "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 1.21e-06, + "input_cost_per_token_batches": 6.05e-07, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.84e-06, + "output_cost_per_token_batches": 2.42e-06, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/global-standard/gpt-4o-2024-08-06": { + "cache_read_input_token_cost": 1.25e-06, + "deprecation_date": "2026-02-27", + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-2024-11-20": { + "cache_read_input_token_cost": 1.25e-06, + "deprecation_date": "2026-03-01", + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global-standard/gpt-4o-mini": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/global/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/global/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/global/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-3.5-turbo": { + "input_cost_per_token": 5e-07, + "litellm_provider": "azure", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-3.5-turbo-0125": { + "deprecation_date": "2025-03-31", + "input_cost_per_token": 5e-07, + "litellm_provider": "azure", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-3.5-turbo-instruct-0914": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "azure_text", + "max_input_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "azure/gpt-35-turbo": { + "input_cost_per_token": 5e-07, + "litellm_provider": "azure", + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-0125": { + "deprecation_date": "2025-05-31", + "input_cost_per_token": 5e-07, + "litellm_provider": "azure", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-1106": { + "deprecation_date": "2025-03-31", + "input_cost_per_token": 1e-06, + "litellm_provider": "azure", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-16k": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-16k-0613": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-35-turbo-instruct": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "azure_text", + "max_input_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "azure/gpt-35-turbo-instruct-0914": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "azure_text", + "max_input_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "azure/gpt-4": { + "input_cost_per_token": 3e-05, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-0125-preview": { + "input_cost_per_token": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-0613": { + "input_cost_per_token": 3e-05, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-1106-preview": { + "input_cost_per_token": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-32k": { + "input_cost_per_token": 6e-05, + "litellm_provider": "azure", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_tool_choice": true + }, + "azure/gpt-4-32k-0613": { + "input_cost_per_token": 6e-05, + "litellm_provider": "azure", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.00012, + "supports_tool_choice": true + }, + "azure/gpt-4-turbo": { + "input_cost_per_token": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "azure/gpt-4-turbo-2024-04-09": { + "input_cost_per_token": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4-turbo-vision-preview": { + "input_cost_per_token": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4.1": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-06, + "output_cost_per_token_batches": 4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-06, + "output_cost_per_token_batches": 4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-mini": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-07, + "input_cost_per_token_batches": 2e-07, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "output_cost_per_token_batches": 8e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-mini-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-07, + "input_cost_per_token_batches": 2e-07, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "output_cost_per_token_batches": 8e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/gpt-4.1-nano": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-07, + "input_cost_per_token_batches": 5e-08, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "output_cost_per_token_batches": 2e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4.1-nano-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-07, + "input_cost_per_token_batches": 5e-08, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "output_cost_per_token_batches": 2e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4.5-preview": { + "cache_read_input_token_cost": 3.75e-05, + "input_cost_per_token": 7.5e-05, + "input_cost_per_token_batches": 3.75e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.00015, + "output_cost_per_token_batches": 7.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-05-13": { + "input_cost_per_token": 5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-audio-2025-08-28": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-audio-1.5-2026-02-23": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-audio-mini-2025-10-06": { + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-mini": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.65e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.65e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-4o-mini-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-realtime-2025-08-28": { + "cache_creation_input_audio_token_cost": 4e-06, + "cache_read_input_token_cost": 4e-06, + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_image": 5e-06, + "input_cost_per_token": 4e-06, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1.6e-05, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-realtime-1.5-2026-02-23": { + "cache_creation_input_audio_token_cost": 4e-06, + "cache_read_input_token_cost": 4e-06, + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_image": 5e-06, + "input_cost_per_token": 4e-06, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1.6e-05, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-realtime-mini-2025-10-06": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_token_cost": 6e-08, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_image": 8e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-4o-mini-transcribe": { + "input_cost_per_audio_token": 3e-06, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 5e-06, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "azure/gpt-4o-mini-tts": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "mode": "audio_speech", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] + }, + "azure/gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2e-05, + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_audio_token": 0.0001, + "input_cost_per_token": 5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.0002, + "output_cost_per_token": 2e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 2e-05, + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/gpt-4o-transcribe": { + "input_cost_per_audio_token": 6e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "azure/gpt-4o-transcribe-diarize": { + "input_cost_per_audio_token": 6e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "azure/gpt-5.1-2025-11-13": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_none_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "azure/gpt-5.1-chat-2025-11-13": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/gpt-5.1-codex-2025-11-13": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-mini-2025-11-13": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_priority": 4.5e-08, + "input_cost_per_token": 2.5e-07, + "input_cost_per_token_priority": 4.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-06, + "output_cost_per_token_priority": 3.6e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-chat": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-chat-latest": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-codex": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-mini": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-nano": { + "cache_read_input_token_cost": 5e-09, + "input_cost_per_token": 5e-08, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5e-09, + "input_cost_per_token": 5e-08, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5-pro": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2": { + "cache_read_input_token_cost": 1.75e-07, + "input_cost_per_token": 1.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-2025-12-11": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.2-chat": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-chat-2025-12-11": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-07, + "input_cost_per_token": 1.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.4e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.3-chat": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.3-codex": { + "cache_read_input_token_cost": 1.75e-07, + "input_cost_per_token": 1.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.4e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/gpt-5.2-pro": { + "input_cost_per_token": 2.1e-05, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.2-pro-2025-12-11": { + "input_cost_per_token": 2.1e-05, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.4": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_above_272k_tokens": 5e-07, + "cache_read_input_token_cost_priority": 5e-07, + "cache_read_input_token_cost_above_272k_tokens_priority": 1e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_above_272k_tokens": 5e-06, + "input_cost_per_token_priority": 5e-06, + "input_cost_per_token_above_272k_tokens_priority": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_272k_tokens": 2.25e-05, + "output_cost_per_token_priority": 3e-05, + "output_cost_per_token_above_272k_tokens_priority": 4.5e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.4-2026-03-05": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_above_272k_tokens": 5e-07, + "cache_read_input_token_cost_priority": 5e-07, + "cache_read_input_token_cost_above_272k_tokens_priority": 1e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_above_272k_tokens": 5e-06, + "input_cost_per_token_priority": 5e-06, + "input_cost_per_token_above_272k_tokens_priority": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_272k_tokens": 2.25e-05, + "output_cost_per_token_priority": 3e-05, + "output_cost_per_token_above_272k_tokens_priority": 4.5e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "azure/gpt-5.4-pro": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.4-pro-2026-03-05": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.5": { + "cache_read_input_token_cost": 5e-07, + "cache_read_input_token_cost_above_272k_tokens": 1e-06, + "cache_read_input_token_cost_priority": 1e-06, + "cache_read_input_token_cost_above_272k_tokens_priority": 2e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_token_above_272k_tokens": 1e-05, + "input_cost_per_token_priority": 1e-05, + "input_cost_per_token_above_272k_tokens_priority": 2e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-05, + "output_cost_per_token_above_272k_tokens": 4.5e-05, + "output_cost_per_token_priority": 6e-05, + "output_cost_per_token_above_272k_tokens_priority": 9e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "azure/gpt-5.5-2026-04-23": { + "cache_read_input_token_cost": 5e-07, + "cache_read_input_token_cost_above_272k_tokens": 1e-06, + "cache_read_input_token_cost_priority": 1e-06, + "cache_read_input_token_cost_above_272k_tokens_priority": 2e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_token_above_272k_tokens": 1e-05, + "input_cost_per_token_priority": 1e-05, + "input_cost_per_token_above_272k_tokens_priority": 2e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-05, + "output_cost_per_token_above_272k_tokens": 4.5e-05, + "output_cost_per_token_priority": 6e-05, + "output_cost_per_token_above_272k_tokens_priority": 9e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.5-pro": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false, + "supports_low_reasoning_effort": false + }, + "azure/gpt-5.5-pro-2026-04-23": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/gpt-5.4-mini": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 7.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.5e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false + }, + "azure/gpt-5.4-mini-2026-03-17": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 7.5e-07, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.5e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false + }, + "azure/gpt-5.4-nano": { + "cache_read_input_token_cost": 2e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false + }, + "azure/gpt-5.4-nano-2026-03-17": { + "cache_read_input_token_cost": 2e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "azure", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false + }, + "azure/gpt-image-1": { + "cache_read_input_image_token_cost": 2.5e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_image_token": 1e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 4e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "azure/hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 7.629e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 6.539e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 6.539e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/high/1024-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.59263611e-07, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/high/1024-x-1536/gpt-image-1": { + "input_cost_per_pixel": 1.58945719e-07, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/high/1536-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.58945719e-07, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/low/1024-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.0490417e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/low/1024-x-1536/gpt-image-1": { + "input_cost_per_pixel": 1.0172526e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/low/1536-x-1024/gpt-image-1": { + "input_cost_per_pixel": 1.0172526e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/medium/1024-x-1024/gpt-image-1": { + "input_cost_per_pixel": 4.0054321e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/medium/1024-x-1536/gpt-image-1": { + "input_cost_per_pixel": 4.0054321e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/medium/1536-x-1024/gpt-image-1": { + "input_cost_per_pixel": 4.0054321e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-07, + "cache_read_input_token_cost": 2e-07, + "input_cost_per_image_token": 2.5e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 8e-06, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "azure/gpt-image-1.5": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_image_token": 8e-06, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 3.2e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "azure/gpt-image-1.5-2025-12-16": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_image_token": 8e-06, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_image_token": 3.2e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "azure/gpt-image-2": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_image_token": 8e-06, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 1e-05, + "output_cost_per_image_token": 3e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "azure/gpt-image-2-2026-04-21": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_image_token": 8e-06, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 1e-05, + "output_cost_per_image_token": 3e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "azure/low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0751953125e-09, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0751953125e-09, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 2.0345052083e-09, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 8.056640625e-09, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 8.056640625e-09, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 7.9752604167e-09, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/high/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 3.173828125e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/high/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_pixel": 3.173828125e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/high/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_pixel": 3.1575520833e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure/mistral-large-2402": { + "input_cost_per_token": 8e-06, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_function_calling": true + }, + "azure/mistral-large-latest": { + "input_cost_per_token": 8e-06, + "litellm_provider": "azure", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_function_calling": true + }, + "azure/o1": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o1-2024-12-17": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o1-mini": { + "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 1.21e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.84e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o1-mini-2024-09-12": { + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o1-preview": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o1-preview-2024-09-12": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_vision": false + }, + "azure/o3": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o3-2025-04-16": { + "deprecation_date": "2026-04-16", + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o3-deep-research": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 4e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "azure/o3-mini": { + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/o3-mini-2025-01-31": { + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/o3-pro": { + "input_cost_per_token": 2e-05, + "input_cost_per_token_batches": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-05, + "output_cost_per_token_batches": 4e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o3-pro-2025-06-10": { + "input_cost_per_token": 2e-05, + "input_cost_per_token_batches": 1e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-05, + "output_cost_per_token_batches": 4e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o4-mini": { + "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/o4-mini-2025-04-16": { + "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/standard/1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 0.0, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 3.81469e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 4.359e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 4.359e-08, + "litellm_provider": "azure", + "mode": "image_generation", + "output_cost_per_token": 0.0 + }, + "azure/text-embedding-3-large": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/text-embedding-3-small": { + "deprecation_date": "2026-04-30", + "input_cost_per_token": 2e-08, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/text-embedding-ada-002": { + "input_cost_per_token": 1e-07, + "litellm_provider": "azure", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "azure/speech/azure-tts": { + "input_cost_per_character": 1.5e-05, + "litellm_provider": "azure", + "mode": "audio_speech", + "source": "https://azure.microsoft.com/en-us/pricing/calculator/" + }, + "azure/speech/azure-tts-hd": { + "input_cost_per_character": 3e-05, + "litellm_provider": "azure", + "mode": "audio_speech", + "source": "https://azure.microsoft.com/en-us/pricing/calculator/" + }, + "azure/tts-1": { + "input_cost_per_character": 1.5e-05, + "litellm_provider": "azure", + "mode": "audio_speech" + }, + "azure/tts-1-hd": { + "input_cost_per_character": 3e-05, + "litellm_provider": "azure", + "mode": "audio_speech" + }, + "azure/us/gpt-4.1-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 2.2e-06, + "input_cost_per_token_batches": 1.1e-06, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8.8e-06, + "output_cost_per_token_batches": 4.4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/us/gpt-4.1-mini-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 4.4e-07, + "input_cost_per_token_batches": 2.2e-07, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.76e-06, + "output_cost_per_token_batches": 8.8e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": false + }, + "azure/us/gpt-4.1-nano-2025-04-14": { + "deprecation_date": "2026-11-04", + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1.1e-07, + "input_cost_per_token_batches": 6e-08, + "litellm_provider": "azure", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4.4e-07, + "output_cost_per_token_batches": 2.2e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-2024-08-06": { + "deprecation_date": "2026-02-27", + "cache_read_input_token_cost": 1.375e-06, + "input_cost_per_token": 2.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-2024-11-20": { + "deprecation_date": "2026-03-01", + "cache_creation_input_token_cost": 1.38e-06, + "input_cost_per_token": 2.75e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 8.3e-08, + "input_cost_per_token": 1.65e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6.6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3.3e-07, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_audio_token": 1.1e-05, + "input_cost_per_token": 6.6e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2.2e-05, + "output_cost_per_token": 2.64e-06, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-10-01": { + "cache_creation_input_audio_token_cost": 2.2e-05, + "cache_read_input_token_cost": 2.75e-06, + "input_cost_per_audio_token": 0.00011, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00022, + "output_cost_per_token": 2.2e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_audio_token_cost": 2.5e-06, + "cache_read_input_token_cost": 2.75e-06, + "input_cost_per_audio_token": 4.4e-05, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 2.2e-05, + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "azure/us/gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.375e-07, + "input_cost_per_token": 1.375e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.75e-08, + "input_cost_per_token": 2.75e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5.5e-09, + "input_cost_per_token": 5.5e-08, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.4e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5.1": { + "cache_read_input_token_cost": 1.4e-07, + "input_cost_per_token": 1.38e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/us/gpt-5.1-chat": { + "cache_read_input_token_cost": 1.4e-07, + "input_cost_per_token": 1.38e-06, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_none_reasoning_effort": true + }, + "azure/us/gpt-5.1-codex": { + "cache_read_input_token_cost": 1.4e-07, + "input_cost_per_token": 1.38e-06, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.75e-07, + "litellm_provider": "azure", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2.2e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/o1-2024-12-17": { + "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 1.65e-05, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6.6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/o1-mini-2024-09-12": { + "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 1.21e-06, + "input_cost_per_token_batches": 6.05e-07, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.84e-06, + "output_cost_per_token_batches": 2.42e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/us/o1-preview-2024-09-12": { + "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 1.65e-05, + "litellm_provider": "azure", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_vision": false + }, + "azure/us/o3-2025-04-16": { + "deprecation_date": "2026-04-16", + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 2.2e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8.8e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/us/o3-mini-2025-01-31": { + "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 1.21e-06, + "input_cost_per_token_batches": 6.05e-07, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.84e-06, + "output_cost_per_token_batches": 2.42e-06, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure/us/o4-mini-2025-04-16": { + "cache_read_input_token_cost": 3.1e-07, + "input_cost_per_token": 1.21e-06, + "litellm_provider": "azure", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.84e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure/whisper-1": { + "input_cost_per_second": 0.0001, + "litellm_provider": "azure", + "mode": "audio_transcription", + "output_cost_per_second": 0.0001 + }, + "azure_ai/Cohere-embed-v3-english": { + "input_cost_per_token": 1e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", + "supports_embedding_image_input": true + }, + "azure_ai/Cohere-embed-v3-multilingual": { + "input_cost_per_token": 1e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/cohere.cohere-embed-v3-english-offer?tab=PlansAndPrice", + "supports_embedding_image_input": true + }, + "azure_ai/FLUX-1.1-pro": { + "litellm_provider": "azure_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure_ai/FLUX.1-Kontext-pro": { + "litellm_provider": "azure_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure_ai/flux.2-pro": { + "litellm_provider": "azure_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://ai.azure.com/explore/models/flux.2-pro/version/1/registry/azureml-blackforestlabs", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "azure_ai/Llama-3.2-11B-Vision-Instruct": { + "input_cost_per_token": 3.7e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.7e-07, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Llama-3.2-90B-Vision-Instruct": { + "input_cost_per_token": 2.04e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.04e-06, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 7.1e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 7.1e-07, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.llama-3-3-70b-instruct-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "input_cost_per_token": 1.41e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3.5e-07, + "source": "https://azure.microsoft.com/en-us/blog/introducing-the-llama-4-herd-in-azure-ai-foundry-and-azure-databricks/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 10000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 7.8e-07, + "source": "https://azure.microsoft.com/en-us/blog/introducing-the-llama-4-herd-in-azure-ai-foundry-and-azure-databricks/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Meta-Llama-3-70B-Instruct": { + "input_cost_per_token": 1.1e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.7e-07, + "supports_tool_choice": true + }, + "azure_ai/Meta-Llama-3.1-405B-Instruct": { + "input_cost_per_token": 5.33e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.6e-05, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", + "supports_tool_choice": true + }, + "azure_ai/Meta-Llama-3.1-70B-Instruct": { + "input_cost_per_token": 2.68e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.54e-06, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice", + "supports_tool_choice": true + }, + "azure_ai/Meta-Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.1e-07, + "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice", + "supports_tool_choice": true + }, + "azure_ai/Phi-3-medium-128k-instruct": { + "input_cost_per_token": 1.7e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.8e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-medium-4k-instruct": { + "input_cost_per_token": 1.7e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.8e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-mini-128k-instruct": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-mini-4k-instruct": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-small-128k-instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3-small-8k-instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3.5-MoE-instruct": { + "input_cost_per_token": 1.6e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.4e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3.5-mini-instruct": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-3.5-vision-instruct": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.2e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/", + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/Phi-4": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/affordable-innovation-unveiling-the-pricing-of-phi-3-slms-on-models-as-a-service/4156495", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "azure_ai/Phi-4-mini-instruct": { + "input_cost_per_token": 7.5e-08, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112", + "supports_function_calling": true + }, + "azure_ai/Phi-4-multimodal-instruct": { + "input_cost_per_audio_token": 4e-06, + "input_cost_per_token": 8e-08, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-07, + "source": "https://techcommunity.microsoft.com/blog/Azure-AI-Services-blog/announcing-new-phi-pricing-empowering-your-business-with-small-language-models/4395112", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_vision": true + }, + "azure_ai/Phi-4-mini-reasoning": { + "input_cost_per_token": 8e-08, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true + }, + "azure_ai/Phi-4-reasoning": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "azure_ai/mistral-document-ai-2505": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" + }, + "azure_ai/mistral-document-ai-2512": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/" + }, + "azure_ai/doc-intelligence/prebuilt-read": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.0015, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/doc-intelligence/prebuilt-layout": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.01, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/doc-intelligence/prebuilt-document": { + "litellm_provider": "azure_ai", + "ocr_cost_per_page": 0.01, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" + }, + "azure_ai/MAI-DS-R1": { + "input_cost_per_token": 1.35e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-06, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/cohere-rerank-v3-english": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v3-multilingual": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v3.5": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "azure_ai/cohere-rerank-v4.0-pro": { + "input_cost_per_query": 0.0025, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_query_tokens": 4096, + "max_tokens": 32768, + "mode": "rerank", + "output_cost_per_token": 0.0, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-cohere-rerank-4-0-in-microsoft-foundry/4477076" + }, + "azure_ai/cohere-rerank-v4.0-fast": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_query_tokens": 4096, + "max_tokens": 32768, + "mode": "rerank", + "output_cost_per_token": 0.0, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-cohere-rerank-4-0-in-microsoft-foundry/4477076" + }, + "azure_ai/deepseek-v3.2": { + "input_cost_per_token": 5.8e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-deepseek-v3-2-and-deepseek-v3-2-speciale-in-microsoft-foundry/4477549", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/deepseek-v3.2-speciale": { + "input_cost_per_token": 5.8e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/introducing-deepseek-v3-2-and-deepseek-v3-2-speciale-in-microsoft-foundry/4477549", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/deepseek-r1": { + "input_cost_per_token": 1.35e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-06, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367", + "supports_reasoning": true, + "supports_tool_choice": true + }, + "azure_ai/deepseek-v3": { + "input_cost_per_token": 1.14e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.56e-06, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", + "supports_tool_choice": true + }, + "azure_ai/deepseek-v3-0324": { + "input_cost_per_token": 1.14e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.56e-06, + "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/embed-v-4-0": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 3072, + "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image" + ], + "supports_embedding_image_input": true + }, + "azure_ai/global/grok-3": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/global/grok-3-mini": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.27e-06, + "source": "https://devblogs.microsoft.com/foundry/announcing-grok-3-and-grok-3-mini-on-azure-ai-foundry/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-3": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-3-mini": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.27e-06, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-non-reasoning": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-reasoning": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-1-fast-non-reasoning": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://techcommunity.microsoft.com/t5/Azure-AI-Foundry-Blog/Grok-4-0-Goes-GA-in-Microsoft-Foundry-and-Grok-4-1-Fast-Arrives/ba-p/4497964", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-1-fast-reasoning": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://techcommunity.microsoft.com/t5/Azure-AI-Foundry-Blog/Grok-4-0-Goes-GA-in-Microsoft-Foundry-and-Grok-4-1-Fast-Arrives/ba-p/4497964", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-code-fast-1": { + "input_cost_per_token": 2e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/jais-30b-chat": { + "input_cost_per_token": 0.0032, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00971, + "source": "https://azure.microsoft.com/en-us/products/ai-services/ai-foundry/models/jais-30b-chat" + }, + "azure_ai/jamba-instruct": { + "input_cost_per_token": 5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 70000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7e-07, + "supports_tool_choice": true + }, + "azure_ai/kimi-k2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/kimi-k2-5-now-in-microsoft-foundry/4492321", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, + "azure_ai/ministral-3b": { + "input_cost_per_token": 4e-08, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-08, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large": { + "input_cost_per_token": 4e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large-2407": { + "input_cost_per_token": 2e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large-latest": { + "input_cost_per_token": 2e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-large-3": { + "input_cost_per_token": 5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 256000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://azure.microsoft.com/en-us/blog/introducing-mistral-large-3-in-microsoft-foundry-open-capable-and-ready-for-production-workloads/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "azure_ai/mistral-medium-2505": { + "input_cost_per_token": 4e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-nemo": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice", + "supports_function_calling": true + }, + "azure_ai/mistral-small": { + "input_cost_per_token": 1e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "azure_ai/mistral-small-2503": { + "input_cost_per_token": 1e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "babbage-002": { + "input_cost_per_token": 4e-07, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 4e-07 + }, + "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { + "input_cost_per_second": 0.001902, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.001902, + "supports_tool_choice": true + }, + "bedrock/*/1-month-commitment/cohere.command-text-v14": { + "input_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.011, + "supports_tool_choice": true + }, + "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { + "input_cost_per_second": 0.0011416, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.0011416, + "supports_tool_choice": true + }, + "bedrock/*/6-month-commitment/cohere.command-text-v14": { + "input_cost_per_second": 0.0066027, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_second": 0.0066027, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.01475, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.01475, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0455 + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0455, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.008194, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.008194, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02527 + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02527, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { + "input_cost_per_token": 2.23e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7.55e-06, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/anthropic.claude-v1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/ap-northeast-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 2.22e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/ap-northeast-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.03e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/ap-northeast-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-northeast-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.03e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.03e-06, + "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, + "bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 3.18e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-06 + }, + "bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-07 + }, + "bedrock/ap-south-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 2.22e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/ap-south-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.94e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/ap-south-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-south-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-2/minimax.minimax-m2.5": { + "input_cost_per_token": 3.09e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.236e-06 + }, + "bedrock/ap-southeast-3/deepseek.v3.2": { + "input_cost_per_token": 7.4e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 2.22e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/ap-southeast-3/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ap-southeast-3/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 3.05e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.03e-06 + }, + "bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.9e-07 + }, + "bedrock/eu-north-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 2.22e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-north-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-north-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/eu-north-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.01635, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.01635, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0415 + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0415, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.009083, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.009083, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02305 + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.02305, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/anthropic.claude-instant-v1": { + "input_cost_per_token": 2.48e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 8.38e-06, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/anthropic.claude-v1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05 + }, + "bedrock/eu-central-1/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/eu-central-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-central-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/eu-central-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.86e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.78e-06 + }, + "bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.5e-07 + }, + "bedrock/eu-west-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/eu-west-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 3.45e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.55e-06 + }, + "bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3.9e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.8e-07 + }, + "bedrock/eu-west-2/minimax.minimax-m2.1": { + "input_cost_per_token": 4.7e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.86e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-2/minimax.minimax-m2.5": { + "input_cost_per_token": 4.7e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.86e-06 + }, + "bedrock/eu-west-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 7.8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.86e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-west-3/mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.6e-07, + "supports_tool_choice": true + }, + "bedrock/eu-west-3/mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 1.04e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3.12e-05, + "supports_function_calling": true + }, + "bedrock/eu-west-3/mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 5.9e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 9.1e-07, + "supports_tool_choice": true + }, + "bedrock/eu-south-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/eu-south-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/eu-south-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Anthropic via Invoke route does not currently support pdf input." + }, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 4.45e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.88e-06 + }, + "bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.01e-06 + }, + "bedrock/sa-east-1/deepseek.v3.2": { + "input_cost_per_token": 7.4e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 2.22e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.44e-06 + }, + "bedrock/sa-east-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 7.3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.03e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/sa-east-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/sa-east-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.44e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.011, + "supports_tool_choice": true + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175 + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175, + "supports_tool_choice": true + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00611, + "supports_tool_choice": true + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972 + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972, + "supports_tool_choice": true + }, + "bedrock/us-east-1/anthropic.claude-instant-v1": { + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-06, + "supports_tool_choice": true + }, + "bedrock/us-east-1/anthropic.claude-v1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/us-east-1/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/us-east-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.5e-06 + }, + "bedrock/us-east-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07 + }, + "bedrock/us-east-1/mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_tool_choice": true + }, + "bedrock/us-east-1/mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_function_calling": true + }, + "bedrock/us-east-1/mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 4.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-07, + "supports_tool_choice": true + }, + "bedrock/us-east-1/deepseek.v3.2": { + "input_cost_per_token": 6.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.85e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.2e-06 + }, + "bedrock/us-east-1/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-east-1/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/deepseek.v3.2": { + "input_cost_per_token": 6.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.85e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.2e-06 + }, + "bedrock/us-east-2/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-east-2/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-gov-east-1/amazon.nova-pro-v1:0": { + "input_cost_per_token": 9.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.84e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "bedrock/us-gov-east-1/amazon.titan-embed-text-v1": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "bedrock/us-gov-east-1/amazon.titan-embed-text-v2:0": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "bedrock/us-gov-east-1/amazon.titan-text-express-v1": { + "input_cost_per_token": 1.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.7e-06 + }, + "bedrock/us-gov-east-1/amazon.titan-text-lite-v1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-07 + }, + "bedrock/us-gov-east-1/amazon.titan-text-premier-v1:0": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-06 + }, + "bedrock/us-gov-east-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3.6e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3.6e-07, + "cache_creation_input_token_cost": 4.5e-06 + }, + "bedrock/us-gov-east-1/anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07 + }, + "bedrock/us-gov-east-1/anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_native_structured_output": true + }, + "bedrock/us-gov-east-1/claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_native_structured_output": true + }, + "bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.5e-06, + "supports_pdf_input": true + }, + "bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.65e-06, + "supports_pdf_input": true + }, + "bedrock/us-gov-west-1/amazon.nova-pro-v1:0": { + "input_cost_per_token": 9.6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.84e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/amazon.titan-embed-text-v1": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "bedrock/us-gov-west-1/amazon.titan-embed-text-v2:0": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "bedrock/us-gov-west-1/amazon.titan-text-express-v1": { + "input_cost_per_token": 1.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.7e-06 + }, + "bedrock/us-gov-west-1/amazon.titan-text-lite-v1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-07 + }, + "bedrock/us-gov-west-1/amazon.titan-text-premier-v1:0": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 42000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-06 + }, + "bedrock/us-gov-west-1/anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 4.5e-06, + "cache_read_input_token_cost": 3.6e-07, + "input_cost_per_token": 3.6e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "bedrock/us-gov-west-1/anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3.6e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.8e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3.6e-07, + "cache_creation_input_token_cost": 4.5e-06 + }, + "bedrock/us-gov-west-1/anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07 + }, + "bedrock/us-gov-west-1/anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_native_structured_output": true + }, + "bedrock/us-gov-west-1/claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_native_structured_output": true + }, + "bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 3.5e-06, + "supports_pdf_input": true + }, + "bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.65e-06, + "supports_pdf_input": true + }, + "bedrock/us-west-1/meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.5e-06 + }, + "bedrock/us-west-1/meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07 + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.011, + "supports_tool_choice": true + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175 + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.0175, + "supports_tool_choice": true + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { + "input_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00611, + "supports_tool_choice": true + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972 + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { + "input_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_second": 0.00972, + "supports_tool_choice": true + }, + "bedrock/us-west-2/anthropic.claude-instant-v1": { + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-06, + "supports_tool_choice": true + }, + "bedrock/us-west-2/anthropic.claude-v1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/us-west-2/anthropic.claude-v2:1": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 100000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true + }, + "bedrock/us-west-2/mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_tool_choice": true + }, + "bedrock/us-west-2/mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_function_calling": true + }, + "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 4.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-07, + "supports_tool_choice": true + }, + "bedrock/us-west-2/deepseek.v3.2": { + "input_cost_per_token": 6.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.85e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/minimax.minimax-m2.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "output_cost_per_token": 1.2e-06 + }, + "bedrock/us-west-2/moonshotai.kimi-k2-thinking": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_function_calling": true, + "supports_reasoning": true + }, + "bedrock/us-west-2/moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us.anthropic.claude-3-5-haiku-20241022-v1:0": { + "cache_creation_input_token_cost": 1e-06, + "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "black_forest_labs/flux-kontext-pro": { + "litellm_provider": "black_forest_labs", + "mode": "image_edit", + "output_cost_per_image": 0.04, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/edits", + "/v1/images/generations" + ] + }, + "black_forest_labs/flux-kontext-max": { + "litellm_provider": "black_forest_labs", + "mode": "image_edit", + "output_cost_per_image": 0.08, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/edits", + "/v1/images/generations" + ] + }, + "black_forest_labs/flux-pro-1.0-fill": { + "litellm_provider": "black_forest_labs", + "mode": "image_edit", + "output_cost_per_image": 0.05, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "black_forest_labs/flux-pro-1.0-expand": { + "litellm_provider": "black_forest_labs", + "mode": "image_edit", + "output_cost_per_image": 0.05, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "black_forest_labs/flux-pro-1.1": { + "litellm_provider": "black_forest_labs", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "black_forest_labs/flux-pro-1.1-ultra": { + "litellm_provider": "black_forest_labs", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "black_forest_labs/flux-dev": { + "litellm_provider": "black_forest_labs", + "mode": "image_generation", + "output_cost_per_image": 0.025, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "black_forest_labs/flux-pro": { + "litellm_provider": "black_forest_labs", + "mode": "image_generation", + "output_cost_per_image": 0.05, + "source": "https://bfl.ai/pricing", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "cerebras/llama-3.3-70b": { + "input_cost_per_token": 8.5e-07, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/llama3.1-70b": { + "input_cost_per_token": 6e-07, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/llama3.1-8b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "cerebras/gpt-oss-120b": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "cerebras", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7.5e-07, + "source": "https://www.cerebras.ai/blog/openai-gpt-oss-120b-runs-fastest-on-cerebras", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "cerebras/qwen-3-32b": { + "input_cost_per_token": 4e-07, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 8e-07, + "source": "https://inference-docs.cerebras.ai/support/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "cerebras/zai-glm-4.6": { + "deprecation_date": "2026-01-20", + "input_cost_per_token": 2.25e-06, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "source": "https://www.cerebras.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "cerebras/zai-glm-4.7": { + "input_cost_per_token": 2.25e-06, + "litellm_provider": "cerebras", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "source": "https://www.cerebras.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "chatdolphin": { + "input_cost_per_token": 5e-07, + "litellm_provider": "nlp_cloud", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 5e-07 + }, + "chatgpt-4o-latest": { + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-transcribe-diarize": { + "input_cost_per_audio_token": 6e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "claude-haiku-4-5-20251001": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "claude-3-7-sonnet-20250219": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "deprecation_date": "2026-02-19", + "input_cost_per_token": 3e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-3-haiku-20240307": { + "cache_creation_input_token_cost": 3e-07, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 264 + }, + "claude-3-opus-20240229": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 1.5e-06, + "deprecation_date": "2026-05-01", + "input_cost_per_token": 1.5e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 395 + }, + "claude-4-opus-20250514": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-4-sonnet-20250514": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost": 3e-07, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "claude-sonnet-4-5-20250929": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tool_use_system_prompt_tokens": 346 + }, + "claude-sonnet-4-6": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_minimal_reasoning_effort": true + }, + "claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-1-20250805": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "deprecation_date": "2026-08-05", + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-20250514": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "deprecation_date": "2026-05-14", + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5-20251101": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_minimal_reasoning_effort": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_minimal_reasoning_effort": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "claude-opus-4-6": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6.0 + }, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "claude-opus-4-6-20260205": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6.0 + }, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "claude-opus-4-7": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6.0 + }, + "supports_minimal_reasoning_effort": true + }, + "claude-opus-4-7-20260416": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "provider_specific_entry": { + "us": 1.1, + "fast": 6.0 + }, + "supports_minimal_reasoning_effort": true + }, + "claude-sonnet-4-20250514": { + "deprecation_date": "2026-05-14", + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { + "input_cost_per_token": 1.923e-06, + "litellm_provider": "cloudflare", + "max_input_tokens": 3072, + "max_output_tokens": 3072, + "max_tokens": 3072, + "mode": "chat", + "output_cost_per_token": 1.923e-06 + }, + "cloudflare/@cf/meta/llama-2-7b-chat-int8": { + "input_cost_per_token": 1.923e-06, + "litellm_provider": "cloudflare", + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.923e-06 + }, + "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { + "input_cost_per_token": 1.923e-06, + "litellm_provider": "cloudflare", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.923e-06 + }, + "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { + "input_cost_per_token": 1.923e-06, + "litellm_provider": "cloudflare", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.923e-06 + }, + "codestral/codestral-2405": { + "input_cost_per_token": 0.0, + "litellm_provider": "codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "codestral/codestral-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "codex-mini-latest": { + "cache_read_input_token_cost": 3.75e-07, + "input_cost_per_token": 1.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 6e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "cohere.command-light-text-v14": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_tool_choice": true + }, + "cohere.command-r-plus-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_tool_choice": true + }, + "cohere.command-r-v1:0": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_tool_choice": true + }, + "cohere.command-text-v14": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_tool_choice": true + }, + "cohere.embed-english-v3": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "cohere.embed-multilingual-v3": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "cohere.embed-v4:0": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, + "cohere/embed-v4.0": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "cohere", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, + "cohere.rerank-v3-5:0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "max_document_chunks_per_query": 100, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "max_tokens_per_document_chunk": 512, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "command": { + "input_cost_per_token": 1e-06, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "command-a-03-2025": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "cohere_chat", + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-light": { + "input_cost_per_token": 3e-07, + "litellm_provider": "cohere_chat", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_tool_choice": true + }, + "command-nightly": { + "input_cost_per_token": 1e-06, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "command-r": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r-08-2024": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r-plus": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r-plus-08-2024": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "command-r7b-12-2024": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "cohere_chat", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.75e-08, + "source": "https://docs.cohere.com/v2/docs/command-r7b", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "computer-use-preview": { + "input_cost_per_token": 3e-06, + "litellm_provider": "azure", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dall-e-2": { + "input_cost_per_image": 0.02, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits", + "/v1/images/variations" + ] + }, + "dall-e-3": { + "input_cost_per_image": 0.04, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "deepseek-chat": { + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, + "litellm_provider": "deepseek", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-07, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "deepseek-reasoner": { + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, + "litellm_provider": "deepseek", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.2e-07, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false + }, + "dashscope/qwen-coder": { + "input_cost_per_token": 3e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-flash": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 5e-08, + "output_cost_per_token": 4e-07, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 2e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen-flash-2025-07-28": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 5e-08, + "output_cost_per_token": 4e-07, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 2e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen-max": { + "input_cost_per_token": 1.6e-06, + "litellm_provider": "dashscope", + "max_input_tokens": 30720, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6.4e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus": { + "input_cost_per_token": 4e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-01-25": { + "input_cost_per_token": 4e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-04-28": { + "input_cost_per_token": 4e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-06, + "output_cost_per_token": 1.2e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-07-14": { + "input_cost_per_token": 4e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-06, + "output_cost_per_token": 1.2e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-plus-2025-07-28": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-07, + "output_cost_per_reasoning_token": 4e-06, + "output_cost_per_token": 1.2e-06, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 1.2e-06, + "output_cost_per_reasoning_token": 1.2e-05, + "output_cost_per_token": 3.6e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen-plus-2025-09-11": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-07, + "output_cost_per_reasoning_token": 4e-06, + "output_cost_per_token": 1.2e-06, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 1.2e-06, + "output_cost_per_reasoning_token": 1.2e-05, + "output_cost_per_token": 3.6e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen-plus-latest": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-07, + "output_cost_per_reasoning_token": 4e-06, + "output_cost_per_token": 1.2e-06, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 1.2e-06, + "output_cost_per_reasoning_token": 1.2e-05, + "output_cost_per_token": 3.6e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen-turbo": { + "input_cost_per_token": 5e-08, + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 5e-07, + "output_cost_per_token": 2e-07, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-turbo-2024-11-01": { + "input_cost_per_token": 5e-08, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-turbo-2025-04-28": { + "input_cost_per_token": 5e-08, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 5e-07, + "output_cost_per_token": 2e-07, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-turbo-latest": { + "input_cost_per_token": 5e-08, + "litellm_provider": "dashscope", + "max_input_tokens": 1000000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_reasoning_token": 5e-07, + "output_cost_per_token": 2e-07, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen3-30b-a3b": { + "litellm_provider": "dashscope", + "max_input_tokens": 129024, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen3-coder-flash": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.5e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "cache_read_input_token_cost": 1.2e-07, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2.5e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 8e-07, + "output_cost_per_token": 4e-06, + "range": [ + 128000.0, + 256000.0 + ] + }, + { + "cache_read_input_token_cost": 4e-07, + "input_cost_per_token": 1.6e-06, + "output_cost_per_token": 9.6e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen3-coder-flash-2025-07-28": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.5e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2.5e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 8e-07, + "output_cost_per_token": 4e-06, + "range": [ + 128000.0, + 256000.0 + ] + }, + { + "input_cost_per_token": 1.6e-06, + "output_cost_per_token": 9.6e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen3-coder-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 5e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "cache_read_input_token_cost": 1.8e-07, + "input_cost_per_token": 1.8e-06, + "output_cost_per_token": 9e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "range": [ + 128000.0, + 256000.0 + ] + }, + { + "cache_read_input_token_cost": 6e-07, + "input_cost_per_token": 6e-06, + "output_cost_per_token": 6e-05, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen3-coder-plus-2025-07-22": { + "litellm_provider": "dashscope", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 5e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 1.8e-06, + "output_cost_per_token": 9e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "range": [ + 128000.0, + 256000.0 + ] + }, + { + "input_cost_per_token": 6e-06, + "output_cost_per_token": 6e-05, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwen3-max-preview": { + "litellm_provider": "dashscope", + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 6e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 2.4e-06, + "output_cost_per_token": 1.2e-05, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "range": [ + 128000.0, + 252000.0 + ] + } + ] + }, + "dashscope/qwen3-max": { + "litellm_provider": "dashscope", + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 6e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 2.4e-06, + "output_cost_per_token": 1.2e-05, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "range": [ + 128000.0, + 252000.0 + ] + } + ] + }, + "dashscope/qwen3-max-2026-01-23": { + "litellm_provider": "dashscope", + "max_input_tokens": 258048, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "tiered_pricing": [ + { + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 6e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 2.4e-06, + "output_cost_per_token": 1.2e-05, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "range": [ + 128000.0, + 252000.0 + ] + } + ] + }, + "dashscope/qwen3-next-80b-a3b-instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "dashscope/qwen3-next-80b-a3b-thinking": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen3-vl-235b-a22b-instruct": { + "input_cost_per_token": 4e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-235b-a22b-thinking": { + "input_cost_per_token": 4e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-32b-instruct": { + "input_cost_per_token": 1.6e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.4e-07, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-32b-thinking": { + "input_cost_per_token": 1.6e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 2.87e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/model-pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "dashscope/qwen3-vl-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 260096, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 1.6e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.4e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 6e-07, + "output_cost_per_token": 4.8e-06, + "range": [ + 128000.0, + 256000.0 + ] + } + ] + }, + "dashscope/qwen3.5-plus": { + "litellm_provider": "dashscope", + "max_input_tokens": 991808, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4e-07, + "output_cost_per_token": 2.4e-06, + "range": [ + 0, + 256000.0 + ] + }, + { + "input_cost_per_token": 5e-07, + "output_cost_per_token": 3e-06, + "range": [ + 256000.0, + 1000000.0 + ] + } + ] + }, + "dashscope/qwq-plus": { + "input_cost_per_token": 8e-07, + "litellm_provider": "dashscope", + "max_input_tokens": 98304, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-06, + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "dashscope/qwen-image-2.0": { + "litellm_provider": "dashscope", + "mode": "image_generation", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "dashscope/qwen-image-2.0-pro": { + "litellm_provider": "dashscope", + "mode": "image_generation", + "source": "https://www.alibabacloud.com/help/en/model-studio/models", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "databricks/databricks-bge-large-en": { + "input_cost_per_token": 1.0003e-07, + "input_dbu_cost_per_token": 1.429e-06, + "litellm_provider": "databricks", + "max_input_tokens": 512, + "max_tokens": 512, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-claude-3-7-sonnet": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-haiku-4-5": { + "input_cost_per_token": 1.00002e-06, + "input_dbu_cost_per_token": 1.4286e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.00003e-06, + "output_dbu_cost_per_token": 7.1429e-05, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4": { + "input_cost_per_token": 1.5000020000000002e-05, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 7.500003000000001e-05, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-1": { + "input_cost_per_token": 1.5000020000000002e-05, + "input_dbu_cost_per_token": 0.000214286, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 7.500003000000001e-05, + "output_dbu_cost_per_token": 0.001071429, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-opus-4-5": { + "input_cost_per_token": 5.00003e-06, + "input_dbu_cost_per_token": 7.1429e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.5000010000000002e-05, + "output_dbu_cost_per_token": 0.000357143, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_minimal_reasoning_effort": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-1": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-claude-sonnet-4-5": { + "input_cost_per_token": 2.9999900000000002e-06, + "input_dbu_cost_per_token": 4.2857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-flash": { + "input_cost_per_token": 3.0001999999999996e-07, + "input_dbu_cost_per_token": 4.285999999999999e-06, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.49998e-06, + "output_dbu_cost_per_token": 3.5714e-05, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemini-2-5-pro": { + "input_cost_per_token": 1.24999e-06, + "input_dbu_cost_per_token": 1.7857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-06, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "databricks/databricks-gemma-3-12b": { + "input_cost_per_token": 1.5000999999999998e-07, + "input_dbu_cost_per_token": 2.1429999999999996e-06, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.0001e-07, + "output_dbu_cost_per_token": 7.143e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-5": { + "input_cost_per_token": 1.24999e-06, + "input_dbu_cost_per_token": 1.7857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-06, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-1": { + "input_cost_per_token": 1.24999e-06, + "input_dbu_cost_per_token": 1.7857e-05, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 9.999990000000002e-06, + "output_dbu_cost_per_token": 0.000142857, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-mini": { + "input_cost_per_token": 2.4997000000000006e-07, + "input_dbu_cost_per_token": 3.571e-06, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.9999700000000004e-06, + "output_dbu_cost_per_token": 2.8571e-05, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-5-nano": { + "input_cost_per_token": 4.998e-08, + "input_dbu_cost_per_token": 7.14e-07, + "litellm_provider": "databricks", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.9998000000000007e-07, + "output_dbu_cost_per_token": 5.714000000000001e-06, + "source": "https://www.databricks.com/product/pricing/proprietary-foundation-model-serving" + }, + "databricks/databricks-gpt-oss-120b": { + "input_cost_per_token": 1.5000999999999998e-07, + "input_dbu_cost_per_token": 2.1429999999999996e-06, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 5.9997e-07, + "output_dbu_cost_per_token": 8.571e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gpt-oss-20b": { + "input_cost_per_token": 7e-08, + "input_dbu_cost_per_token": 1e-06, + "litellm_provider": "databricks", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 3.0001999999999996e-07, + "output_dbu_cost_per_token": 4.285999999999999e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-gte-large-en": { + "input_cost_per_token": 1.2999000000000001e-07, + "input_dbu_cost_per_token": 1.857e-06, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-llama-2-70b-chat": { + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, + "litellm_provider": "databricks", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000300000000002e-06, + "output_dbu_cost_per_token": 2.1429e-05, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-llama-4-maverick": { + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Databricks documentation now provides both DBU costs (_dbu_cost_per_token) and dollar costs(_cost_per_token)." + }, + "mode": "chat", + "output_cost_per_token": 1.5000300000000002e-06, + "output_dbu_cost_per_token": 2.1429e-05, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-meta-llama-3-1-405b-instruct": { + "input_cost_per_token": 5.00003e-06, + "input_dbu_cost_per_token": 7.1429e-05, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000020000000002e-05, + "output_dbu_cost_per_token": 0.000214286, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-meta-llama-3-1-8b-instruct": { + "input_cost_per_token": 1.5000999999999998e-07, + "input_dbu_cost_per_token": 2.1429999999999996e-06, + "litellm_provider": "databricks", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 4.5003000000000007e-07, + "output_dbu_cost_per_token": 6.429000000000001e-06, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving" + }, + "databricks/databricks-meta-llama-3-3-70b-instruct": { + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.5000300000000002e-06, + "output_dbu_cost_per_token": 2.1429e-05, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-meta-llama-3-70b-instruct": { + "input_cost_per_token": 1.00002e-06, + "input_dbu_cost_per_token": 1.4286e-05, + "litellm_provider": "databricks", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 2.9999900000000002e-06, + "output_dbu_cost_per_token": 4.2857e-05, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-mixtral-8x7b-instruct": { + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, + "litellm_provider": "databricks", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.00002e-06, + "output_dbu_cost_per_token": 1.4286e-05, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-mpt-30b-instruct": { + "input_cost_per_token": 1.00002e-06, + "input_dbu_cost_per_token": 1.4286e-05, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 1.00002e-06, + "output_dbu_cost_per_token": 1.4286e-05, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "databricks/databricks-mpt-7b-instruct": { + "input_cost_per_token": 5.0001e-07, + "input_dbu_cost_per_token": 7.143e-06, + "litellm_provider": "databricks", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation." + }, + "mode": "chat", + "output_cost_per_token": 0.0, + "output_dbu_cost_per_token": 0.0, + "source": "https://www.databricks.com/product/pricing/foundation-model-serving", + "supports_tool_choice": true + }, + "dataforseo/search": { + "input_cost_per_query": 0.003, + "litellm_provider": "dataforseo", + "mode": "search" + }, + "davinci-002": { + "input_cost_per_token": 2e-06, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "deepgram/base": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-conversationalai": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-finance": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-general": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-meeting": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-phonecall": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-video": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/base-voicemail": { + "input_cost_per_second": 0.00020833, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0125/60 seconds = $0.00020833 per second", + "original_pricing_per_minute": 0.0125 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/enhanced": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/enhanced-finance": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/enhanced-general": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/enhanced-meeting": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/enhanced-phonecall": { + "input_cost_per_second": 0.00024167, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0145/60 seconds = $0.00024167 per second", + "original_pricing_per_minute": 0.0145 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-atc": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-automotive": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-conversationalai": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-drivethru": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-finance": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-general": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-meeting": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-phonecall": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-video": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-2-voicemail": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-3": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-3-general": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-3-medical": { + "input_cost_per_second": 8.667e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0052/60 seconds = $0.00008667 per second (multilingual)", + "original_pricing_per_minute": 0.0052 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-general": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/nova-phonecall": { + "input_cost_per_second": 7.167e-05, + "litellm_provider": "deepgram", + "metadata": { + "calculation": "$0.0043/60 seconds = $0.00007167 per second", + "original_pricing_per_minute": 0.0043 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/whisper": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/whisper-base": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/whisper-large": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/whisper-medium": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/whisper-small": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepgram/whisper-tiny": { + "input_cost_per_second": 0.0001, + "litellm_provider": "deepgram", + "metadata": { + "notes": "Deepgram's hosted OpenAI Whisper models - pricing may differ from native Deepgram models" + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://deepgram.com/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "deepinfra/Gryphe/MythoMax-L2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 9e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 1e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 3e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Qwen/QwQ-32B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen2.5-72B-Instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.2e-07, + "output_cost_per_token": 3.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen2.5-7B-Instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_vision": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-14B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 2.4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-235B-A22B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1.8e-07, + "output_cost_per_token": 5.4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 9e-08, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.9e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-30B-A3B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 2.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-32B": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 2.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 4e-07, + "output_cost_per_token": 1.6e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.9e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 5e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6.5e-07, + "output_cost_per_token": 7.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6.5e-07, + "output_cost_per_token": 7.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/allenai/olmOCR-7B-0725-FP8": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1.5e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/anthropic/claude-3-7-sonnet-latest": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 3.3e-06, + "output_cost_per_token": 1.65e-05, + "cache_read_input_token_cost": 3.3e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/anthropic/claude-4-opus": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 1.65e-05, + "output_cost_per_token": 8.25e-05, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/anthropic/claude-4-sonnet": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 3.3e-06, + "output_cost_per_token": 1.65e-05, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 7e-07, + "output_cost_per_token": 2.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-0528": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2.15e-06, + "cache_read_input_token_cost": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 2.7e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 3.8e-07, + "output_cost_per_token": 8.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3-0324": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 8.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1e-06, + "cache_read_input_token_cost": 2.16e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_function_calling": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1e-06, + "cache_read_input_token_cost": 2.16e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/google/gemini-2.0-flash-001": { + "deprecation_date": "2026-06-01", + "max_tokens": 1000000, + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/google/gemini-2.5-flash": { + "max_tokens": 1000000, + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.5e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/google/gemini-2.5-pro": { + "max_tokens": 1000000, + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "input_cost_per_token": 1.25e-06, + "output_cost_per_token": 1e-05, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/google/gemma-3-12b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-08, + "output_cost_per_token": 1e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/google/gemma-3-27b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-08, + "output_cost_per_token": 1.6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/google/gemma-3-4b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 8e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4.9e-08, + "output_cost_per_token": 4.9e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/meta-llama/Llama-3.2-3B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 2e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Llama-3.3-70B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2.3e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 3.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "max_tokens": 1048576, + "max_input_tokens": 1048576, + "max_output_tokens": 1048576, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "max_tokens": 327680, + "max_input_tokens": 327680, + "max_output_tokens": 327680, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 3e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Llama-Guard-3-8B": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5.5e-08, + "output_cost_per_token": 5.5e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/meta-llama/Llama-Guard-4-12B": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 1.8e-07, + "output_cost_per_token": 1.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-08, + "output_cost_per_token": 6e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 2.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-08, + "output_cost_per_token": 5e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 3e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/microsoft/WizardLM-2-8x22B": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 4.8e-07, + "output_cost_per_token": 4.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": false + }, + "deepinfra/microsoft/phi-4": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 7e-08, + "output_cost_per_token": 1.4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 4e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-08, + "output_cost_per_token": 8e-08, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 7.5e-08, + "output_cost_per_token": 2e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 4e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2e-06, + "cache_read_input_token_cost": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/openai/gpt-oss-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-08, + "output_cost_per_token": 4.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/openai/gpt-oss-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepinfra/zai-org/GLM-4.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-07, + "output_cost_per_token": 1.6e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "deepseek/deepseek-chat": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, + "input_cost_per_token_cache_hit": 2.8e-08, + "litellm_provider": "deepseek", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4.2e-07, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-coder": { + "input_cost_per_token": 1.4e-07, + "input_cost_per_token_cache_hit": 1.4e-08, + "litellm_provider": "deepseek", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-r1": { + "input_cost_per_token": 5.5e-07, + "input_cost_per_token_cache_hit": 1.4e-07, + "litellm_provider": "deepseek", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.19e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-reasoner": { + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, + "input_cost_per_token_cache_hit": 2.8e-08, + "litellm_provider": "deepseek", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.2e-07, + "source": "https://api-docs.deepseek.com/quick_start/pricing", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_assistant_prefill": true, + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false + }, + "deepseek/deepseek-v3": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 7e-08, + "input_cost_per_token": 2.7e-07, + "input_cost_per_token_cache_hit": 7e-08, + "litellm_provider": "deepseek", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.1e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-07, + "input_cost_per_token_cache_hit": 2.8e-08, + "litellm_provider": "deepseek", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "deepseek.v3-v1:0": { + "input_cost_per_token": 5.8e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 81920, + "max_tokens": 81920, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_native_structured_output": true + }, + "deepseek.v3.2": { + "input_cost_per_token": 6.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.85e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "dolphin": { + "input_cost_per_token": 5e-07, + "litellm_provider": "nlp_cloud", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "completion", + "output_cost_per_token": 5e-07 + }, + "deepseek-v3-2-251201": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 98304, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "glm-4-7-251222": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "kimi-k2-thinking-251104": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 229376, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "doubao-embedding": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - standard version with 2560 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560 + }, + "doubao-embedding-large": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - large version with 2048 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2048 + }, + "doubao-embedding-large-text-240915": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - text-240915 version with 4096 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 4096 + }, + "doubao-embedding-large-text-250515": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - text-250515 version with 2048 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2048 + }, + "doubao-embedding-text-240715": { + "input_cost_per_token": 0.0, + "litellm_provider": "volcengine", + "max_input_tokens": 4096, + "max_tokens": 4096, + "metadata": { + "notes": "Volcengine Doubao embedding model - text-240715 version with 2560 dimensions" + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560 + }, + "exa_ai/search": { + "litellm_provider": "exa_ai", + "mode": "search", + "tiered_pricing": [ + { + "input_cost_per_query": 0.005, + "max_results_range": [ + 0, + 25 + ] + }, + { + "input_cost_per_query": 0.025, + "max_results_range": [ + 26, + 100 + ] + } + ] + }, + "firecrawl/search": { + "litellm_provider": "firecrawl", + "mode": "search", + "tiered_pricing": [ + { + "input_cost_per_query": 0.00166, + "max_results_range": [ + 1, + 10 + ] + }, + { + "input_cost_per_query": 0.00332, + "max_results_range": [ + 11, + 20 + ] + }, + { + "input_cost_per_query": 0.00498, + "max_results_range": [ + 21, + 30 + ] + }, + { + "input_cost_per_query": 0.00664, + "max_results_range": [ + 31, + 40 + ] + }, + { + "input_cost_per_query": 0.0083, + "max_results_range": [ + 41, + 50 + ] + }, + { + "input_cost_per_query": 0.00996, + "max_results_range": [ + 51, + 60 + ] + }, + { + "input_cost_per_query": 0.01162, + "max_results_range": [ + 61, + 70 + ] + }, + { + "input_cost_per_query": 0.01328, + "max_results_range": [ + 71, + 80 + ] + }, + { + "input_cost_per_query": 0.01494, + "max_results_range": [ + 81, + 90 + ] + }, + { + "input_cost_per_query": 0.0166, + "max_results_range": [ + 91, + 100 + ] + } + ], + "metadata": { + "notes": "Firecrawl search pricing: $83 for 100,000 credits, 2 credits per 10 results. Cost = ceiling(limit/10) * 2 * $0.00083" + } + }, + "perplexity/search": { + "input_cost_per_query": 0.005, + "litellm_provider": "perplexity", + "mode": "search" + }, + "searxng/search": { + "litellm_provider": "searxng", + "mode": "search", + "input_cost_per_query": 0.0, + "metadata": { + "notes": "SearXNG is an open-source metasearch engine. Free to use when self-hosted or using public instances." + } + }, + "serper/search": { + "input_cost_per_query": 0.001, + "litellm_provider": "serper", + "mode": "search", + "metadata": { + "notes": "Serper Google Search API. Pricing: $1.00/1k queries (Starter), $0.75/1k (Standard), $0.50/1k (Scale), $0.30/1k (Ultimate)." + } + }, + "elevenlabs/scribe_v1": { + "input_cost_per_second": 6.11e-05, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", + "notes": "ElevenLabs Scribe v1 - state-of-the-art speech recognition model with 99 language support", + "original_pricing_per_hour": 0.22 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "elevenlabs/scribe_v1_experimental": { + "input_cost_per_second": 6.11e-05, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.22/hour = $0.00366/minute = $0.0000611 per second (enterprise pricing)", + "notes": "ElevenLabs Scribe v1 experimental - enhanced version of the main Scribe model", + "original_pricing_per_hour": 0.22 + }, + "mode": "audio_transcription", + "output_cost_per_second": 0.0, + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "elevenlabs/eleven_v3": { + "input_cost_per_character": 0.00018, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.18/1000 characters (Scale plan pricing, 1 credit per character)", + "notes": "ElevenLabs Eleven v3 - most expressive TTS model with 70+ languages and audio tags support" + }, + "mode": "audio_speech", + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "elevenlabs/eleven_multilingual_v2": { + "input_cost_per_character": 0.00018, + "litellm_provider": "elevenlabs", + "metadata": { + "calculation": "$0.18/1000 characters (Scale plan pricing, 1 credit per character)", + "notes": "ElevenLabs Eleven Multilingual v2 - default TTS model with 29 languages support" + }, + "mode": "audio_speech", + "source": "https://elevenlabs.io/pricing", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "embed-english-light-v2.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-english-light-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-english-v2.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_tokens": 4096, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-english-v3.0": { + "input_cost_per_image": 0.0001, + "input_cost_per_token": 1e-07, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "metadata": { + "notes": "'supports_image_input' is a deprecated field. Use 'supports_embedding_image_input' instead." + }, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true, + "supports_image_input": true + }, + "embed-multilingual-v2.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "cohere", + "max_input_tokens": 768, + "max_tokens": 768, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "embed-multilingual-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "embed-multilingual-light-v3.0": { + "input_cost_per_token": 0.0001, + "litellm_provider": "cohere", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "embedding", + "output_cost_per_token": 0.0, + "supports_embedding_image_input": true + }, + "eu.amazon.nova-lite-v1:0": { + "input_cost_per_token": 7.8e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.12e-07, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "eu.amazon.nova-micro-v1:0": { + "input_cost_per_token": 4.6e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.84e-07, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "eu.amazon.nova-pro-v1:0": { + "input_cost_per_token": 1.05e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 4.2e-06, + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 + }, + "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-06, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 1.1e-06, + "deprecation_date": "2026-10-15", + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "eu.anthropic.claude-3-7-sonnet-20250219-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "eu.anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 + }, + "eu.anthropic.claude-3-opus-20240229-v1:0": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 1.875e-05 + }, + "eu.anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "eu.anthropic.claude-opus-4-1-20250805-v1:0": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-opus-4-20250514-v1:0": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "eu.meta.llama3-2-1b-instruct-v1:0": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.3e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "eu.meta.llama3-2-3b-instruct-v1:0": { + "input_cost_per_token": 1.9e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.9e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "eu.mistral.pixtral-large-2502-v1:0": { + "input_cost_per_token": 2e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "fal_ai/bria/text-to-image/3.2": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/flux-pro/v1.1": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/flux-pro/v1.1-ultra": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/flux/schnell": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.003, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/bytedance/seedream/v3/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.03, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/bytedance/dreamina/v3.1/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.03, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/ideogram/v3": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/imagen4/preview": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/imagen4/preview/fast": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/imagen4/preview/ultra": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/recraft/v3/text-to-image": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "fal_ai/fal-ai/stable-diffusion-v35-medium": { + "litellm_provider": "fal_ai", + "mode": "image_generation", + "output_cost_per_image": 0.0398, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "featherless_ai/featherless-ai/Qwerky-72B": { + "litellm_provider": "featherless_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat" + }, + "featherless_ai/featherless-ai/Qwerky-QwQ-32B": { + "litellm_provider": "featherless_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat" + }, + "fireworks-ai-4.1b-to-16b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 2e-07 + }, + "fireworks-ai-56b-to-176b": { + "input_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 1.2e-06 + }, + "fireworks-ai-above-16b": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 9e-07 + }, + "fireworks-ai-default": { + "input_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 0.0 + }, + "fireworks-ai-embedding-150m-to-350m": { + "input_cost_per_token": 1.6e-08, + "litellm_provider": "fireworks_ai-embedding-models", + "output_cost_per_token": 0.0 + }, + "fireworks-ai-embedding-up-to-150m": { + "input_cost_per_token": 8e-09, + "litellm_provider": "fireworks_ai-embedding-models", + "output_cost_per_token": 0.0 + }, + "fireworks-ai-moe-up-to-56b": { + "input_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 5e-07 + }, + "fireworks-ai-up-to-4b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "output_cost_per_token": 2e-07 + }, + "fireworks_ai/WhereIsAI/UAE-Large-V1": { + "input_cost_per_token": 1.6e-08, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-instruct": { + "input_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1": { + "input_cost_per_token": 3e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 20480, + "max_tokens": 20480, + "mode": "chat", + "output_cost_per_token": 8e-06, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-0528": { + "input_cost_per_token": 3e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 160000, + "max_output_tokens": 160000, + "max_tokens": 160000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-basic": { + "input_cost_per_token": 5.5e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 20480, + "max_tokens": 20480, + "mode": "chat", + "output_cost_per_token": 2.19e-06, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3-0324": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://fireworks.ai/models/fireworks/deepseek-v3-0324", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p1": { + "input_cost_per_token": 5.6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "source": "https://fireworks.ai/pricing", + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p1-terminus": { + "input_cost_per_token": 5.6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "source": "https://fireworks.ai/pricing", + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v3p2": { + "input_cost_per_token": 5.6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "source": "https://fireworks.ai/models/fireworks/deepseek-v3p2", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/firefunction-v2": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p5": { + "input_cost_per_token": 5.5e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 96000, + "max_tokens": 96000, + "mode": "chat", + "output_cost_per_token": 2.19e-06, + "source": "https://fireworks.ai/models/fireworks/glm-4p5", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p5-air": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 96000, + "max_tokens": 96000, + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "source": "https://artificialanalysis.ai/models/glm-4-5-air", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p6": { + "input_cost_per_token": 5.5e-07, + "output_cost_per_token": 2.19e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/glm-4p7": { + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "source": "https://fireworks.ai/models/fireworks/glm-4p7", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-20b": { + "input_cost_per_token": 5e-08, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct": { + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://fireworks.ai/models/fireworks/kimi-k2-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905": { + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://app.fireworks.ai/models/fireworks/kimi-k2-instruct-0905", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2-thinking": { + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "fireworks_ai/accounts/fireworks/models/kimi-k2p5": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct": { + "input_cost_per_token": 3e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-8b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-90b-vision-instruct": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "fireworks_ai/accounts/fireworks/models/llama4-maverick-instruct-basic": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/llama4-scout-instruct-basic": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://fireworks.ai/pricing", + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/minimax-m2p1": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_token": 3e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://fireworks.ai/models/fireworks/minimax-m2p1", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf": { + "input_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct": { + "input_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/yi-large": { + "input_cost_per_token": 3e-06, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "fireworks_ai/glm-4p7": { + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "source": "https://fireworks.ai/models/fireworks/glm-4p7", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/kimi-k2p5": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://fireworks.ai/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/minimax-m2p1": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_token": 3e-07, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://fireworks.ai/models/fireworks/minimax-m2p1", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1": { + "input_cost_per_token": 8e-09, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { + "input_cost_per_token": 8e-09, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/thenlper/gte-base": { + "input_cost_per_token": 8e-09, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "fireworks_ai/thenlper/gte-large": { + "input_cost_per_token": 1.6e-08, + "litellm_provider": "fireworks_ai-embedding-models", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "source": "https://fireworks.ai/pricing" + }, + "friendliai/meta-llama-3.1-70b-instruct": { + "input_cost_per_token": 6e-07, + "litellm_provider": "friendliai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "friendliai/meta-llama-3.1-8b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "friendliai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:babbage-002": { + "input_cost_per_token": 1.6e-06, + "input_cost_per_token_batches": 2e-07, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 1.6e-06, + "output_cost_per_token_batches": 2e-07 + }, + "ft:davinci-002": { + "input_cost_per_token": 1.2e-05, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_batches": 1e-06 + }, + "ft:gpt-3.5-turbo": { + "input_cost_per_token": 3e-06, + "input_cost_per_token_batches": 1.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "output_cost_per_token_batches": 3e-06, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-3.5-turbo-0125": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-3.5-turbo-0613": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openai", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-3.5-turbo-1106": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4-0613": { + "input_cost_per_token": 3e-05, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4o-2024-08-06": { + "cache_read_input_token_cost": 1.875e-06, + "input_cost_per_token": 3.75e-06, + "input_cost_per_token_batches": 1.875e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_batches": 7.5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "ft:gpt-4o-2024-11-20": { + "cache_creation_input_token_cost": 1.875e-06, + "input_cost_per_token": 3.75e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 3e-07, + "input_cost_per_token_batches": 1.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "output_cost_per_token_batches": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 7.5e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_batches": 1.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_batches": 6e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 8e-07, + "input_cost_per_token_batches": 4e-07, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "output_cost_per_token_batches": 1.6e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_batches": 1e-07, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-07, + "output_cost_per_token_batches": 4e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "ft:o4-mini-2025-04-16": { + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 4e-06, + "input_cost_per_token_batches": 2e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 1.6e-05, + "output_cost_per_token_batches": 8e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "gemini-2.0-flash": { + "cache_read_input_token_cost": 2.5e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-07, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.0-flash-001": { + "cache_read_input_token_cost": 3.75e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.0-flash-lite": { + "cache_read_input_token_cost": 1.875e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7.5e-08, + "input_cost_per_token": 7.5e-08, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.0-flash-lite-001": { + "cache_read_input_token_cost": 1.875e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7.5e-08, + "input_cost_per_token": 7.5e-08, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.5-flash": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + }, + "supports_service_tier": true + }, + "gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-05, + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": false, + "tpm": 8000000, + "supports_service_tier": true + }, + "gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_batches": 6e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, + "gemini-3.1-flash-image-preview": { + "input_cost_per_image": 0.00056, + "input_cost_per_token": 5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.0672, + "output_cost_per_image_token": 6e-05, + "output_cost_per_token": 3e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, + "deep-research-pro-preview-12-2025": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_batches": 6e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini-2.5-flash-lite": { + "cache_read_input_token_cost": 1e-08, + "input_cost_per_audio_token": 3e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + }, + "supports_service_tier": true + }, + "gemini-2.5-flash-lite-preview-09-2025": { + "cache_read_input_token_cost": 1e-08, + "input_cost_per_audio_token": 3e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.5-flash-preview-09-2025": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-live-2.5-flash-preview-native-audio-09-2025": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_audio_token": 3e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "realtime", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_token": 2e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/vertex_ai/live" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-live-2.5-flash-preview-native-audio-09-2025": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_audio_token": 3e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "realtime", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_token": 2e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.5-flash-lite-preview-06-17": { + "deprecation_date": "2025-11-18", + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.5-pro": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + }, + "supports_service_tier": true + }, + "gemini-3-pro-preview": { + "deprecation_date": "2026-03-26", + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "vertex_ai/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "vertex_ai/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 5e-07, + "input_cost_per_audio_token": 1e-06, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "vertex_ai/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "vertex_ai/gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "output_cost_per_image": 0.00012, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini-2.5-pro-preview-tts": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-robotics-er-1.5-preview": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 3e-07, + "input_cost_per_audio_token": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "output_cost_per_reasoning_token": 2.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-robotics-er-1-5-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "video", + "audio" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true + }, + "gemini/gemini-robotics-er-1.5-preview": { + "cache_read_input_token_cost": 0, + "input_cost_per_token": 3e-07, + "input_cost_per_audio_token": 1e-06, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_tokens": 65535, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "output_cost_per_reasoning_token": 2.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-robotics-er-1-5-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "video", + "audio" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "rpm": 10, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.5-computer-use-preview-10-2025": { + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "vertex_ai-language-models", + "max_images_per_prompt": 3000, + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/computer-use", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_computer_use": true, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini-embedding-001": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "gemini-embedding-2-preview": { + "input_cost_per_audio_per_second": 0.00016, + "input_cost_per_image": 0.00012, + "input_cost_per_token": 2e-07, + "input_cost_per_video_per_second": 0.00079, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "uses_embed_content": true + }, + "gemini-embedding-2": { + "input_cost_per_audio_per_second": 0.00016, + "input_cost_per_image": 0.00012, + "input_cost_per_token": 2e-07, + "input_cost_per_video_per_second": 0.00079, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_multimodal": true, + "uses_embed_content": true + }, + "vertex_ai/gemini-embedding-2-preview": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://ai.google.dev/gemini-api/docs/embeddings#multimodal", + "supports_multimodal": true, + "uses_embed_content": true + }, + "vertex_ai/gemini-embedding-2": { + "input_cost_per_audio_per_second": 0.00016, + "input_cost_per_image": 0.00012, + "input_cost_per_token": 2e-07, + "input_cost_per_video_per_second": 0.00079, + "litellm_provider": "vertex_ai", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://ai.google.dev/gemini-api/docs/embeddings#multimodal", + "supports_multimodal": true, + "uses_embed_content": true + }, + "gemini-flash-experimental": { + "input_cost_per_character": 0, + "input_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "uses_embed_content": true + }, + "gemini/gemini-embedding-001": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "gemini", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/embeddings#model-versions", + "tpm": 10000000 + }, + "gemini/gemini-embedding-2-preview": { + "input_cost_per_audio_per_second": 0.00016, + "input_cost_per_image": 0.00012, + "input_cost_per_token": 2e-07, + "input_cost_per_video_per_second": 0.00079, + "litellm_provider": "gemini", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supports_multimodal": true, + "tpm": 10000000 + }, + "gemini/gemini-embedding-2": { + "input_cost_per_audio_per_second": 0.00016, + "input_cost_per_image": 0.00012, + "input_cost_per_token": 2e-07, + "input_cost_per_video_per_second": 0.00079, + "litellm_provider": "gemini", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supports_multimodal": true, + "tpm": 10000000 + }, + "gemini/gemini-1.5-flash": { + "deprecation_date": "2025-09-29", + "input_cost_per_token": 7.5e-08, + "input_cost_per_token_above_128k_tokens": 1.5e-07, + "litellm_provider": "gemini", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/embeddings#multimodal", + "supports_multimodal": true, + "tpm": 10000000 + }, + "gemini/gemini-2.0-flash": { + "cache_read_input_token_cost": 2.5e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-07, + "rpm": 10000, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.0-flash-001": { + "cache_read_input_token_cost": 2.5e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-07, + "rpm": 10000, + "source": "https://ai.google.dev/pricing#2_0flash", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.0-flash-lite": { + "cache_read_input_token_cost": 1.875e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7.5e-08, + "input_cost_per_token": 7.5e-08, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-07, + "rpm": 4000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.5-flash": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + }, + "supports_service_tier": true + }, + "gemini/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "supports_reasoning": false, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-05, + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + }, + "supports_service_tier": true + }, + "gemini/gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "gemini", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-05, + "rpm": 1000, + "tpm": 4000000, + "output_cost_per_token_batches": 6e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, + "gemini/gemini-3.1-flash-image-preview": { + "input_cost_per_token": 2.5e-07, + "input_cost_per_token_batches": 1.25e-07, + "litellm_provider": "gemini", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.045, + "output_cost_per_image_token": 6e-05, + "output_cost_per_image_token_batches": 3e-05, + "output_cost_per_token": 1.5e-06, + "output_cost_per_token_batches": 7.5e-07, + "rpm": 1000, + "tpm": 4000000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-image-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini/deep-research-pro-preview-12-2025": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "gemini", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-05, + "rpm": 1000, + "tpm": 4000000, + "output_cost_per_token_batches": 6e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_web_search": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.5-flash-lite": { + "cache_read_input_token_cost": 1e-08, + "input_cost_per_audio_token": 3e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + }, + "supports_service_tier": true + }, + "gemini/gemini-2.5-flash-lite-preview-09-2025": { + "cache_read_input_token_cost": 1e-08, + "input_cost_per_audio_token": 3e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.5-flash-preview-09-2025": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-flash-latest": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-flash-lite-latest": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_audio_token": 3e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "rpm": 15, + "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.5-flash-lite-preview-06-17": { + "deprecation_date": "2025-11-18", + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.5-flash-preview-tts": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "mode": "audio_speech", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/audio/speech" + ], + "tpm": 4000000, + "rpm": 10 + }, + "gemini/gemini-2.5-pro": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "input_cost_per_token_priority": 1.25e-06, + "input_cost_per_token_above_200k_tokens_priority": 2.5e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "output_cost_per_token_priority": 1e-05, + "output_cost_per_token_above_200k_tokens_priority": 1.5e-05, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_service_tier": true, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-2.5-computer-use-preview-10-2025": { + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "gemini", + "max_images_per_prompt": 3000, + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/computer-use", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_computer_use": true, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 800000 + }, + "gemini/gemini-3-pro-preview": { + "deprecation_date": "2026-03-09", + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, + "gemini/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 5e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-06, + "output_cost_per_token": 3e-06, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini/gemini-3.1-pro-preview-customtools": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_url_context": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-06, + "output_cost_per_token": 3e-06, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini/gemini-2.5-pro-preview-tts": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "rpm": 10000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 10000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-exp-1114": { + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.", + "supports_tool_choice": true + }, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-exp-1206": { + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 2097152, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1206. Assuming same as gemini-1.5-pro.", + "supports_tool_choice": true + }, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-gemma-2-27b-it": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "gemini", + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-gemma-2-9b-it": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "gemini", + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.05e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemma-3-27b-it": { + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://aistudio.google.com", + "supports_audio_output": false, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/imagen-3.0-fast-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-3.0-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-3.0-generate-002": { + "deprecation_date": "2025-11-10", + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-4.0-fast-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-4.0-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/imagen-4.0-ultra-generate-001": { + "litellm_provider": "gemini", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "gemini/learnlm-1.5-pro-experimental": { + "input_cost_per_audio_per_second": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "input_cost_per_character": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_input_tokens": 32767, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 0, + "output_cost_per_character_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "source": "https://aistudio.google.com", + "supports_audio_output": false, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/lyria-3-clip-preview": { + "input_cost_per_token": 0, + "litellm_provider": "gemini", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_image": 0.04, + "output_cost_per_token": 0, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], + "supports_audio_input": false, + "supports_audio_output": true, + "supports_function_calling": false, + "supports_prompt_caching": false, + "supports_response_schema": false, + "supports_system_messages": false, + "supports_vision": false, + "supports_web_search": false + }, + "gemini/lyria-3-pro-preview": { + "input_cost_per_token": 0, + "litellm_provider": "gemini", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], + "supports_audio_input": false, + "supports_audio_output": true, + "supports_function_calling": false, + "supports_prompt_caching": false, + "supports_response_schema": false, + "supports_system_messages": false, + "supports_vision": false, + "supports_web_search": false + }, + "gemini/veo-2.0-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.35, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-lite-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.05, + "output_cost_per_second_1080p": 0.08, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-fast-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.1-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "github_copilot/claude-haiku-4.5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/claude-opus-4.5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_minimal_reasoning_effort": true + }, + "github_copilot/claude-opus-4.6-fast": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/claude-opus-41": { + "litellm_provider": "github_copilot", + "max_input_tokens": 80000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_vision": true + }, + "github_copilot/claude-sonnet-4": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/claude-sonnet-4.5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gemini-2.5-pro": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gemini-3-pro-preview": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-3.5-turbo": { + "litellm_provider": "github_copilot", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-3.5-turbo-0613": { + "litellm_provider": "github_copilot", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-4": { + "litellm_provider": "github_copilot", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-4-0613": { + "litellm_provider": "github_copilot", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true + }, + "github_copilot/gpt-4-o-preview": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-4.1": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-4.1-2025-04-14": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-41-copilot": { + "litellm_provider": "github_copilot", + "mode": "completion" + }, + "github_copilot/gpt-4o": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-4o-2024-05-13": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-4o-2024-08-06": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-4o-2024-11-20": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "github_copilot/gpt-4o-mini": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-4o-mini-2024-07-18": { + "litellm_provider": "github_copilot", + "max_input_tokens": 64000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "github_copilot/gpt-5": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5-mini": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.1": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.1-codex-max": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.2": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/gpt-5.3-codex": { + "litellm_provider": "github_copilot", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "github_copilot/text-embedding-3-small": { + "litellm_provider": "github_copilot", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding" + }, + "github_copilot/text-embedding-3-small-inference": { + "litellm_provider": "github_copilot", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding" + }, + "github_copilot/text-embedding-ada-002": { + "litellm_provider": "github_copilot", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding" + }, + "chatgpt/gpt-5.4": { + "litellm_provider": "chatgpt", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.4-pro": { + "litellm_provider": "chatgpt", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.3-codex": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.3-codex-spark": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.3-instant": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "responses", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.3-chat-latest": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "responses", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.2-codex": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.2": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "responses", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.1-codex-max": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "chatgpt/gpt-5.1-codex-mini": { + "litellm_provider": "chatgpt", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "responses", + "supported_endpoints": [ + "/v1/responses" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "gigachat/GigaChat-2-Lite": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true, + "supports_system_messages": true + }, + "gigachat/GigaChat-2-Max": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_vision": true + }, + "gigachat/GigaChat-2-Pro": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_vision": true + }, + "gigachat/Embeddings": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "gigachat/Embeddings-2": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024 + }, + "gigachat/EmbeddingsGigaR": { + "input_cost_per_token": 0.0, + "litellm_provider": "gigachat", + "max_input_tokens": 4096, + "max_tokens": 4096, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560 + }, + "gmi/anthropic/claude-opus-4.5": { + "input_cost_per_token": 5e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_function_calling": true, + "supports_vision": true, + "supports_minimal_reasoning_effort": true + }, + "gmi/anthropic/claude-sonnet-4.5": { + "input_cost_per_token": 3e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/anthropic/claude-sonnet-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/anthropic/claude-opus-4": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/openai/gpt-5.2": { + "input_cost_per_token": 1.75e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "supports_function_calling": true + }, + "gmi/openai/gpt-5.1": { + "input_cost_per_token": 1.25e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true + }, + "gmi/openai/gpt-5": { + "input_cost_per_token": 1.25e-06, + "litellm_provider": "gmi", + "max_input_tokens": 409600, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true + }, + "gmi/openai/gpt-4o": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "gmi", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/openai/gpt-4o-mini": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "gmi", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/deepseek-ai/DeepSeek-V3.2": { + "input_cost_per_token": 2.8e-07, + "litellm_provider": "gmi", + "max_input_tokens": 163840, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_function_calling": true + }, + "gmi/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 2.8e-07, + "litellm_provider": "gmi", + "max_input_tokens": 163840, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "supports_function_calling": true + }, + "gmi/google/gemini-3-pro-preview": { + "input_cost_per_token": 2e-06, + "litellm_provider": "gmi", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/google/gemini-3-flash-preview": { + "input_cost_per_token": 5e-07, + "litellm_provider": "gmi", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_vision": true + }, + "gmi/moonshotai/Kimi-K2-Thinking": { + "input_cost_per_token": 8e-07, + "litellm_provider": "gmi", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06 + }, + "gmi/MiniMaxAI/MiniMax-M2.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gmi", + "max_input_tokens": 196608, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06 + }, + "baseten/MiniMaxAI/MiniMax-M2.5": { + "input_cost_per_token": 3e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 1.2e-06 + }, + "baseten/nvidia/Nemotron-120B-A12B": { + "input_cost_per_token": 3e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 7.5e-07 + }, + "baseten/zai-org/GLM-5": { + "input_cost_per_token": 9.5e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 3.15e-06 + }, + "baseten/zai-org/GLM-4.7": { + "input_cost_per_token": 6e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 2.2e-06 + }, + "baseten/zai-org/GLM-4.6": { + "input_cost_per_token": 6e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 2.2e-06 + }, + "baseten/moonshotai/Kimi-K2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 3e-06 + }, + "baseten/moonshotai/Kimi-K2-Thinking": { + "input_cost_per_token": 6e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 2.5e-06 + }, + "baseten/moonshotai/Kimi-K2-Instruct-0905": { + "input_cost_per_token": 6e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 2.5e-06 + }, + "baseten/openai/gpt-oss-120b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 5e-07 + }, + "baseten/deepseek-ai/DeepSeek-V3.1": { + "input_cost_per_token": 5e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 1.5e-06 + }, + "baseten/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 7.7e-07, + "litellm_provider": "baseten", + "mode": "chat", + "output_cost_per_token": 7.7e-07 + }, + "gmi/Qwen/Qwen3-VL-235B-A22B-Instruct-FP8": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gmi", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-06, + "supports_vision": true + }, + "gmi/zai-org/GLM-4.7-FP8": { + "input_cost_per_token": 4e-07, + "litellm_provider": "gmi", + "max_input_tokens": 202752, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2e-06 + }, + "google.gemma-3-12b-it": { + "input_cost_per_token": 9e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.9e-07, + "supports_system_messages": true, + "supports_vision": true + }, + "google.gemma-3-27b-it": { + "input_cost_per_token": 2.3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.8e-07, + "supports_system_messages": true, + "supports_vision": true + }, + "google.gemma-3-4b-it": { + "input_cost_per_token": 4e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-08, + "supports_system_messages": true, + "supports_vision": true + }, + "google_pse/search": { + "input_cost_per_query": 0.005, + "litellm_provider": "google_pse", + "mode": "search" + }, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_1hr": 6e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.2e-05, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "global.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "global.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "global.amazon.nova-2-lite-v1:0": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_video_input": true, + "supports_vision": true + }, + "gpt-3.5-turbo": { + "input_cost_per_token": 5e-07, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-0125": { + "input_cost_per_token": 5e-07, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-1106": { + "deprecation_date": "2026-09-28", + "input_cost_per_token": 1e-06, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-16k": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openai", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-3.5-turbo-instruct": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "gpt-3.5-turbo-instruct-0914": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "text-completion-openai", + "max_input_tokens": 8192, + "max_output_tokens": 4097, + "max_tokens": 4097, + "mode": "completion", + "output_cost_per_token": 2e-06 + }, + "gpt-4": { + "input_cost_per_token": 3e-05, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-0125-preview": { + "deprecation_date": "2026-03-26", + "input_cost_per_token": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-0314": { + "deprecation_date": "2026-03-26", + "input_cost_per_token": 3e-05, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-0613": { + "deprecation_date": "2025-06-06", + "input_cost_per_token": 3e-05, + "litellm_provider": "openai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-1106-preview": { + "deprecation_date": "2026-03-26", + "input_cost_per_token": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4-turbo": { + "input_cost_per_token": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4-turbo-2024-04-09": { + "input_cost_per_token": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4-turbo-preview": { + "input_cost_per_token": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4.1": { + "cache_read_input_token_cost": 5e-07, + "cache_read_input_token_cost_priority": 8.75e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-06, + "output_cost_per_token_batches": 4e-06, + "output_cost_per_token_priority": 1.4e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4.1-2025-04-14": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-06, + "output_cost_per_token_batches": 4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4.1-mini": { + "cache_read_input_token_cost": 1e-07, + "cache_read_input_token_cost_priority": 1.75e-07, + "input_cost_per_token": 4e-07, + "input_cost_per_token_batches": 2e-07, + "input_cost_per_token_priority": 7e-07, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "output_cost_per_token_batches": 8e-07, + "output_cost_per_token_priority": 2.8e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4.1-mini-2025-04-14": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-07, + "input_cost_per_token_batches": 2e-07, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "output_cost_per_token_batches": 8e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4.1-nano": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_priority": 5e-08, + "input_cost_per_token": 1e-07, + "input_cost_per_token_batches": 5e-08, + "input_cost_per_token_priority": 2e-07, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "output_cost_per_token_batches": 2e-07, + "output_cost_per_token_priority": 8e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4.1-nano-2025-04-14": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-07, + "input_cost_per_token_batches": 5e-08, + "litellm_provider": "openai", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "output_cost_per_token_batches": 2e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o": { + "cache_read_input_token_cost": 1.25e-06, + "cache_read_input_token_cost_priority": 2.125e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_batches": 1.25e-06, + "input_cost_per_token_priority": 4.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_batches": 5e-06, + "output_cost_per_token_priority": 1.7e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-2024-05-13": { + "input_cost_per_token": 5e-06, + "input_cost_per_token_batches": 2.5e-06, + "input_cost_per_token_priority": 8.75e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_batches": 7.5e-06, + "output_cost_per_token_priority": 2.625e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-2024-08-06": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_batches": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_batches": 5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-2024-11-20": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_batches": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_batches": 5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-audio-preview": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-audio-preview-2025-06-03": { + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 1e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-audio": { + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-1.5": { + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-2025-08-28": { + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-mini": { + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-mini-2025-10-06": { + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-audio-mini-2025-12-15": { + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses", + "/v1/realtime", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": false, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "gpt-4o-mini": { + "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost_priority": 1.25e-07, + "input_cost_per_token": 1.5e-07, + "input_cost_per_token_batches": 7.5e-08, + "input_cost_per_token_priority": 2.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "output_cost_per_token_batches": 3e-07, + "output_cost_per_token_priority": 1e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-mini-2024-07-18": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-07, + "input_cost_per_token_batches": 7.5e-08, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "output_cost_per_token_batches": 3e-07, + "search_context_cost_per_query": { + "search_context_size_high": 0.03, + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275 + }, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-4o-mini-audio-preview": { + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 6e-07, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-audio-preview-2024-12-17": { + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 6e-07, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-realtime-preview": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-realtime-preview-2024-12-17": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-mini-search-preview": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-07, + "input_cost_per_token_batches": 7.5e-08, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "output_cost_per_token_batches": 3e-07, + "search_context_cost_per_query": { + "search_context_size_high": 0.03, + "search_context_size_low": 0.025, + "search_context_size_medium": 0.0275 + }, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4o-mini-search-preview-2025-03-11": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-07, + "input_cost_per_token_batches": 7.5e-08, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "output_cost_per_token_batches": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-mini-transcribe": { + "input_cost_per_audio_token": 3e-06, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 5e-06, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "gpt-4o-mini-tts": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "mode": "audio_speech", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] + }, + "gpt-4o-realtime-preview": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 2e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-realtime-preview-2024-12-17": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 2e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-realtime-preview-2025-06-03": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_audio_token": 4e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 8e-05, + "output_cost_per_token": 2e-05, + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-4o-search-preview": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_batches": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_batches": 5e-06, + "search_context_cost_per_query": { + "search_context_size_high": 0.05, + "search_context_size_low": 0.03, + "search_context_size_medium": 0.035 + }, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-4o-search-preview-2025-03-11": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_batches": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_batches": 5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-4o-transcribe": { + "input_cost_per_audio_token": 6e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "gpt-image-1.5": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_token": 1e-05, + "input_cost_per_image_token": 8e-06, + "output_cost_per_image_token": 3.2e-05, + "supported_endpoints": [ + "/v1/images/generations" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "gpt-image-1.5-2025-12-16": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_token": 1e-05, + "input_cost_per_image_token": 8e-06, + "output_cost_per_image_token": 3.2e-05, + "supported_endpoints": [ + "/v1/images/generations" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "gpt-image-2": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_token": 1e-05, + "input_cost_per_image_token": 8e-06, + "output_cost_per_image_token": 3e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "gpt-image-2-2026-04-21": { + "cache_read_input_image_token_cost": 2e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_token": 1e-05, + "input_cost_per_image_token": 8e-06, + "output_cost_per_image_token": 3e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.034, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.133, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1536/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "1536-x-1024/gpt-image-1.5": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "low/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.034, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "medium/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.05, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.133, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "high/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.2, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "standard/1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.009, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "1024-x-1536/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "1536-x-1024/gpt-image-1.5-2025-12-16": { + "input_cost_per_image": 0.013, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ], + "supports_vision": true, + "supports_pdf_input": true + }, + "gpt-5": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_flex": 6.25e-08, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_flex": 6.25e-07, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_flex": 5e-06, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.1": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.1-2025-11-13": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.1-chat-latest": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.2": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.2-2025-12-11": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.2-chat-latest": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.3-chat-latest": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.2-pro": { + "input_cost_per_token": 2.1e-05, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.2-pro-2025-12-11": { + "input_cost_per_token": 2.1e-05, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000168, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.5": { + "cache_read_input_token_cost": 5e-07, + "cache_read_input_token_cost_above_272k_tokens": 1e-06, + "cache_read_input_token_cost_flex": 2.5e-07, + "cache_read_input_token_cost_priority": 1e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_token_above_272k_tokens": 1e-05, + "input_cost_per_token_flex": 2.5e-06, + "input_cost_per_token_batches": 2.5e-06, + "input_cost_per_token_priority": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-05, + "output_cost_per_token_above_272k_tokens": 4.5e-05, + "output_cost_per_token_flex": 1.5e-05, + "output_cost_per_token_batches": 1.5e-05, + "output_cost_per_token_priority": 6e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "gpt-5.5-2026-04-23": { + "cache_read_input_token_cost": 5e-07, + "cache_read_input_token_cost_above_272k_tokens": 1e-06, + "cache_read_input_token_cost_flex": 2.5e-07, + "cache_read_input_token_cost_priority": 1e-06, + "input_cost_per_token": 5e-06, + "input_cost_per_token_above_272k_tokens": 1e-05, + "input_cost_per_token_flex": 2.5e-06, + "input_cost_per_token_batches": 2.5e-06, + "input_cost_per_token_priority": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-05, + "output_cost_per_token_above_272k_tokens": 4.5e-05, + "output_cost_per_token_flex": 1.5e-05, + "output_cost_per_token_batches": 1.5e-05, + "output_cost_per_token_priority": 6e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "gpt-5.5-pro": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "input_cost_per_token_flex": 1.5e-05, + "input_cost_per_token_batches": 1.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "output_cost_per_token_flex": 9e-05, + "output_cost_per_token_batches": 9e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false, + "supports_low_reasoning_effort": false + }, + "gpt-5.5-pro-2026-04-23": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "input_cost_per_token_flex": 1.5e-05, + "input_cost_per_token_batches": 1.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "output_cost_per_token_flex": 9e-05, + "output_cost_per_token_batches": 9e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false, + "supports_low_reasoning_effort": false + }, + "gpt-5.4": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_above_272k_tokens": 5e-07, + "cache_read_input_token_cost_flex": 1.3e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_above_272k_tokens": 5e-06, + "input_cost_per_token_flex": 1.25e-06, + "input_cost_per_token_batches": 1.25e-06, + "input_cost_per_token_priority": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_272k_tokens": 2.25e-05, + "output_cost_per_token_flex": 7.5e-06, + "output_cost_per_token_batches": 7.5e-06, + "output_cost_per_token_priority": 3e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.4-2026-03-05": { + "cache_read_input_token_cost": 2.5e-07, + "cache_read_input_token_cost_above_272k_tokens": 5e-07, + "cache_read_input_token_cost_flex": 1.3e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 2.5e-06, + "input_cost_per_token_above_272k_tokens": 5e-06, + "input_cost_per_token_flex": 1.25e-06, + "input_cost_per_token_batches": 1.25e-06, + "input_cost_per_token_priority": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_272k_tokens": 2.25e-05, + "output_cost_per_token_flex": 7.5e-06, + "output_cost_per_token_batches": 7.5e-06, + "output_cost_per_token_priority": 3e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.4-pro": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "input_cost_per_token_flex": 1.5e-05, + "input_cost_per_token_batches": 1.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "output_cost_per_token_flex": 9e-05, + "output_cost_per_token_batches": 9e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.4-pro-2026-03-05": { + "cache_read_input_token_cost": 3e-06, + "cache_read_input_token_cost_above_272k_tokens": 6e-06, + "input_cost_per_token": 3e-05, + "input_cost_per_token_above_272k_tokens": 6e-05, + "input_cost_per_token_flex": 1.5e-05, + "input_cost_per_token_batches": 1.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00018, + "output_cost_per_token_above_272k_tokens": 0.00027, + "output_cost_per_token_flex": 9e-05, + "output_cost_per_token_batches": 9e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.4-mini": { + "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost_flex": 3.75e-08, + "cache_read_input_token_cost_batches": 3.75e-08, + "cache_read_input_token_cost_priority": 1.5e-07, + "input_cost_per_token": 7.5e-07, + "input_cost_per_token_flex": 3.75e-07, + "input_cost_per_token_batches": 3.75e-07, + "input_cost_per_token_priority": 1.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.5e-06, + "output_cost_per_token_flex": 2.25e-06, + "output_cost_per_token_batches": 2.25e-06, + "output_cost_per_token_priority": 9e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "gpt-5.4-mini-2026-03-17": { + "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost_flex": 3.75e-08, + "cache_read_input_token_cost_batches": 3.75e-08, + "cache_read_input_token_cost_priority": 1.5e-07, + "input_cost_per_token": 7.5e-07, + "input_cost_per_token_flex": 3.75e-07, + "input_cost_per_token_batches": 3.75e-07, + "input_cost_per_token_priority": 1.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4.5e-06, + "output_cost_per_token_flex": 2.25e-06, + "output_cost_per_token_batches": 2.25e-06, + "output_cost_per_token_priority": 9e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "gpt-5.4-nano": { + "cache_read_input_token_cost": 2e-08, + "cache_read_input_token_cost_flex": 1e-08, + "cache_read_input_token_cost_batches": 1e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_flex": 1e-07, + "input_cost_per_token_batches": 1e-07, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "output_cost_per_token_flex": 6.25e-07, + "output_cost_per_token_batches": 6.25e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "gpt-5.4-nano-2026-03-17": { + "cache_read_input_token_cost": 2e-08, + "cache_read_input_token_cost_flex": 1e-08, + "cache_read_input_token_cost_batches": 1e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_flex": 1e-07, + "input_cost_per_token_batches": 1e-07, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "output_cost_per_token_flex": 6.25e-07, + "output_cost_per_token_batches": 6.25e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "gpt-5-pro": { + "input_cost_per_token": 1.5e-05, + "input_cost_per_token_batches": 7.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 6e-05, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-pro-2025-10-06": { + "input_cost_per_token": 1.5e-05, + "input_cost_per_token_batches": 7.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 6e-05, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-2025-08-07": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_flex": 6.25e-08, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_flex": 6.25e-07, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_flex": 5e-06, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-chat": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-chat-latest": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-codex": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_priority": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "output_cost_per_token_priority": 2e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_priority": 4.5e-08, + "input_cost_per_token": 2.5e-07, + "input_cost_per_token_priority": 4.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 2e-06, + "output_cost_per_token_priority": 3.6e-06, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "gpt-5.3-codex": { + "cache_read_input_token_cost": 1.75e-07, + "cache_read_input_token_cost_priority": 3.5e-07, + "input_cost_per_token": 1.75e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 1.4e-05, + "output_cost_per_token_priority": 2.8e-05, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-mini": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_flex": 1.25e-08, + "cache_read_input_token_cost_priority": 4.5e-08, + "input_cost_per_token": 2.5e-07, + "input_cost_per_token_flex": 1.25e-07, + "input_cost_per_token_priority": 4.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "output_cost_per_token_flex": 1e-06, + "output_cost_per_token_priority": 3.6e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-mini-2025-08-07": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_flex": 1.25e-08, + "cache_read_input_token_cost_priority": 4.5e-08, + "input_cost_per_token": 2.5e-07, + "input_cost_per_token_flex": 1.25e-07, + "input_cost_per_token_priority": 4.5e-07, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "output_cost_per_token_flex": 1e-06, + "output_cost_per_token_priority": 3.6e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-nano": { + "cache_read_input_token_cost": 5e-09, + "cache_read_input_token_cost_flex": 2.5e-09, + "input_cost_per_token": 5e-08, + "input_cost_per_token_flex": 2.5e-08, + "input_cost_per_token_priority": 2.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "output_cost_per_token_flex": 2e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-nano-2025-08-07": { + "cache_read_input_token_cost": 5e-09, + "cache_read_input_token_cost_flex": 2.5e-09, + "input_cost_per_token": 5e-08, + "input_cost_per_token_flex": 2.5e-08, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "output_cost_per_token_flex": 2e-07, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-image-1": { + "cache_read_input_image_token_cost": 2.5e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_image_token": 1e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_image_token": 4e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-07, + "cache_read_input_token_cost": 2e-07, + "input_cost_per_image_token": 2.5e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_image_token": 8e-06, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "gpt-realtime": { + "cache_creation_input_audio_token_cost": 4e-07, + "cache_read_input_token_cost": 4e-07, + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_image": 5e-06, + "input_cost_per_token": 4e-06, + "litellm_provider": "openai", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1.6e-05, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-1.5": { + "cache_creation_input_audio_token_cost": 4e-07, + "cache_read_input_token_cost": 4e-07, + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_image": 5e-06, + "input_cost_per_token": 4e-06, + "litellm_provider": "openai", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1.6e-05, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-mini": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_audio_token_cost": 3e-07, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-2025-08-28": { + "cache_creation_input_audio_token_cost": 4e-07, + "cache_read_input_token_cost": 4e-07, + "input_cost_per_audio_token": 3.2e-05, + "input_cost_per_image": 5e-06, + "input_cost_per_token": 4e-06, + "litellm_provider": "openai", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 6.4e-05, + "output_cost_per_token": 1.6e-05, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gradient_ai/alibaba-qwen3-32b": { + "litellm_provider": "gradient_ai", + "max_tokens": 40960, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 131072, + "max_output_tokens": 40960 + }, + "gradient_ai/anthropic-claude-3-opus": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 200000, + "max_output_tokens": 1024 + }, + "gradient_ai/anthropic-claude-3.5-haiku": { + "input_cost_per_token": 8e-07, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 200000, + "max_output_tokens": 1024 + }, + "gradient_ai/anthropic-claude-3.5-sonnet": { + "input_cost_per_token": 3e-06, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 200000, + "max_output_tokens": 1024 + }, + "gradient_ai/anthropic-claude-3.7-sonnet": { + "input_cost_per_token": 3e-06, + "litellm_provider": "gradient_ai", + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 200000, + "max_output_tokens": 1024 + }, + "gradient_ai/deepseek-r1-distill-llama-70b": { + "input_cost_per_token": 9.9e-07, + "litellm_provider": "gradient_ai", + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 9.9e-07, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 32768, + "max_output_tokens": 8000 + }, + "gradient_ai/llama3-8b-instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "gradient_ai", + "max_tokens": 512, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 8192, + "max_output_tokens": 512 + }, + "gradient_ai/llama3.3-70b-instruct": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "gradient_ai", + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.5e-07, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 128000, + "max_output_tokens": 2048 + }, + "gradient_ai/mistral-nemo-instruct-2407": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gradient_ai", + "max_tokens": 512, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 128000, + "max_output_tokens": 512 + }, + "gradient_ai/openai-gpt-4o": { + "litellm_provider": "gradient_ai", + "max_tokens": 16384, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 128000, + "max_output_tokens": 16384 + }, + "gradient_ai/openai-gpt-4o-mini": { + "litellm_provider": "gradient_ai", + "max_tokens": 16384, + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 128000, + "max_output_tokens": 16384 + }, + "gradient_ai/openai-o3": { + "input_cost_per_token": 2e-06, + "litellm_provider": "gradient_ai", + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 200000, + "max_output_tokens": 100000 + }, + "gradient_ai/openai-o3-mini": { + "input_cost_per_token": 1.1e-06, + "litellm_provider": "gradient_ai", + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false, + "max_input_tokens": 200000, + "max_output_tokens": 100000 + }, + "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/gpt-oss-20b-mxfp4-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/gpt-oss-120b-mxfp-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/Gemma-3-4b-it-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "lemonade/Qwen3-4B-Instruct-2507-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "amazon-nova/nova-micro-v1": { + "input_cost_per_token": 3.5e-08, + "litellm_provider": "amazon_nova", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.4e-07, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "amazon-nova/nova-lite-v1": { + "input_cost_per_token": 6e-08, + "litellm_provider": "amazon_nova", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.4e-07, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon-nova/nova-premier-v1": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "amazon_nova", + "max_input_tokens": 1000000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.25e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": false, + "supports_response_schema": true, + "supports_vision": true + }, + "amazon-nova/nova-pro-v1": { + "input_cost_per_token": 8e-07, + "litellm_provider": "amazon_nova", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "groq/llama-3.1-8b-instant": { + "input_cost_per_token": 5e-08, + "litellm_provider": "groq", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-08, + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/llama-3.3-70b-versatile": { + "input_cost_per_token": 5.9e-07, + "litellm_provider": "groq", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7.9e-07, + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/gemma-7b-it": { + "input_cost_per_token": 5e-08, + "litellm_provider": "groq", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-08, + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/meta-llama/llama-guard-4-12b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "groq", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-07 + }, + "groq/meta-llama/llama-4-maverick-17b-128e-instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "groq/meta-llama/llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 1.1e-07, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.4e-07, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "groq/moonshotai/kimi-k2-instruct-0905": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "cache_read_input_token_cost": 5e-07, + "litellm_provider": "groq", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "groq/openai/gpt-oss-120b": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 32766, + "max_tokens": 32766, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/openai/gpt-oss-20b": { + "cache_read_input_token_cost": 3.75e-08, + "input_cost_per_token": 7.5e-08, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/openai/gpt-oss-safeguard-20b": { + "cache_read_input_token_cost": 3.7e-08, + "input_cost_per_token": 7.5e-08, + "litellm_provider": "groq", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/playai-tts": { + "input_cost_per_character": 5e-05, + "litellm_provider": "groq", + "max_input_tokens": 10000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "audio_speech" + }, + "groq/qwen/qwen3-32b": { + "input_cost_per_token": 2.9e-07, + "litellm_provider": "groq", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 5.9e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true + }, + "groq/whisper-large-v3": { + "input_cost_per_second": 3.083e-05, + "litellm_provider": "groq", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "groq/whisper-large-v3-turbo": { + "input_cost_per_second": 1.111e-05, + "litellm_provider": "groq", + "mode": "audio_transcription", + "output_cost_per_second": 0.0 + }, + "hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 7.629e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 6.539e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 6.539e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "heroku/claude-3-5-haiku": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "heroku/claude-3-5-sonnet-latest": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "heroku/claude-3-7-sonnet": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "heroku/claude-4-sonnet": { + "litellm_provider": "heroku", + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "high/1024-x-1024/gpt-image-1": { + "input_cost_per_image": 0.167, + "input_cost_per_pixel": 1.59263611e-07, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "high/1024-x-1536/gpt-image-1": { + "input_cost_per_image": 0.25, + "input_cost_per_pixel": 1.58945719e-07, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "high/1536-x-1024/gpt-image-1": { + "input_cost_per_image": 0.25, + "input_cost_per_pixel": 1.58945719e-07, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/QwQ-32B": { + "input_cost_per_token": 2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/Qwen2.5-72B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/Qwen/Qwen3-235B-A22B": { + "input_cost_per_token": 2e-06, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-R1": { + "input_cost_per_token": 4e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-R1-0528": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-V3": { + "input_cost_per_token": 2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 4e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Llama-3.2-3B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3-70B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3.1-405B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "hyperbolic", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "hyperbolic/moonshotai/Kimi-K2-Instruct": { + "input_cost_per_token": 2e-06, + "litellm_provider": "hyperbolic", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "j2-light": { + "input_cost_per_token": 3e-06, + "litellm_provider": "ai21", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 3e-06 + }, + "j2-mid": { + "input_cost_per_token": 1e-05, + "litellm_provider": "ai21", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 1e-05 + }, + "j2-ultra": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "ai21", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 1.5e-05 + }, + "jamba-1.5": { + "input_cost_per_token": 2e-07, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "jamba-1.5-large": { + "input_cost_per_token": 2e-06, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_tool_choice": true + }, + "jamba-1.5-large@001": { + "input_cost_per_token": 2e-06, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_tool_choice": true + }, + "jamba-1.5-mini": { + "input_cost_per_token": 2e-07, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "jamba-1.5-mini@001": { + "input_cost_per_token": 2e-07, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "jamba-large-1.6": { + "input_cost_per_token": 2e-06, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_tool_choice": true + }, + "jamba-large-1.7": { + "input_cost_per_token": 2e-06, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_tool_choice": true + }, + "jamba-mini-1.6": { + "input_cost_per_token": 2e-07, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "jamba-mini-1.7": { + "input_cost_per_token": 2e-07, + "litellm_provider": "ai21", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "jina-reranker-v2-base-multilingual": { + "input_cost_per_token": 1.8e-08, + "litellm_provider": "jina_ai", + "max_document_chunks_per_query": 2048, + "max_input_tokens": 1024, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "rerank", + "output_cost_per_token": 1.8e-08 + }, + "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "jp.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-06, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "crusoe/deepseek-ai/DeepSeek-R1-0528": { + "input_cost_per_token": 3e-06, + "litellm_provider": "crusoe", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 7e-06, + "supports_function_calling": false, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": false + }, + "crusoe/deepseek-ai/DeepSeek-V3-0324": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "crusoe", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "crusoe/google/gemma-3-12b-it": { + "input_cost_per_token": 1e-07, + "litellm_provider": "crusoe", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "crusoe/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "crusoe", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "crusoe/moonshotai/Kimi-K2-Thinking": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "crusoe", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_function_calling": false, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": false + }, + "crusoe/openai/gpt-oss-120b": { + "input_cost_per_token": 8e-07, + "litellm_provider": "crusoe", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "crusoe/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "input_cost_per_token": 3e-06, + "litellm_provider": "crusoe", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-llama3.3-70b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-r1-0528": { + "input_cost_per_token": 2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-r1-671b": { + "input_cost_per_token": 8e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/deepseek-v3-0324": { + "input_cost_per_token": 2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/hermes3-405b": { + "input_cost_per_token": 8e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/hermes3-70b": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/hermes3-8b": { + "input_cost_per_token": 2.5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-08, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/lfm-40b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/lfm-7b": { + "input_cost_per_token": 2.5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-08, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama-4-maverick-17b-128e-instruct-fp8": { + "input_cost_per_token": 5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 16384, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-405b-instruct-fp8": { + "input_cost_per_token": 8e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-70b-instruct-fp8": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-8b-instruct": { + "input_cost_per_token": 2.5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-08, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.1-nemotron-70b-instruct-fp8": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.2-11b-vision-instruct": { + "input_cost_per_token": 1.5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-08, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "lambda_ai/llama3.2-3b-instruct": { + "input_cost_per_token": 1.5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-08, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/llama3.3-70b-instruct-fp8": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/qwen25-coder-32b-instruct": { + "input_cost_per_token": 5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "lambda_ai/qwen3-32b-fp8": { + "input_cost_per_token": 5e-08, + "litellm_provider": "lambda_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "low/1024-x-1024/gpt-image-1": { + "input_cost_per_image": 0.011, + "input_cost_per_pixel": 1.0490417e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1024-x-1536/gpt-image-1": { + "input_cost_per_image": 0.016, + "input_cost_per_pixel": 1.0172526e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1536-x-1024/gpt-image-1": { + "input_cost_per_image": 0.016, + "input_cost_per_pixel": 1.0172526e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.036 + }, + "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.072 + }, + "medium/1024-x-1024/gpt-image-1": { + "input_cost_per_image": 0.042, + "input_cost_per_pixel": 4.0054321e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1536/gpt-image-1": { + "input_cost_per_image": 0.063, + "input_cost_per_pixel": 4.0054321e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1536-x-1024/gpt-image-1": { + "input_cost_per_image": 0.063, + "input_cost_per_pixel": 4.0054321e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.005, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.011, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medlm-large": { + "input_cost_per_character": 5e-06, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_character": 1.5e-05, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "medlm-medium": { + "input_cost_per_character": 5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_character": 1e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "supports_tool_choice": true + }, + "meta.llama2-13b-chat-v1": { + "input_cost_per_token": 7.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-06 + }, + "meta.llama2-70b-chat-v1": { + "input_cost_per_token": 1.95e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.56e-06 + }, + "meta.llama3-1-405b-instruct-v1:0": { + "input_cost_per_token": 5.32e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.6e-05, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-1-70b-instruct-v1:0": { + "input_cost_per_token": 9.9e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 9.9e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-1-8b-instruct-v1:0": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.2e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-11b-instruct-v1:0": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.5e-07, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "meta.llama3-2-1b-instruct-v1:0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-3b-instruct-v1:0": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-2-90b-instruct-v1:0": { + "input_cost_per_token": 2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "meta.llama3-3-70b-instruct-v1:0": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama3-70b-instruct-v1:0": { + "input_cost_per_token": 2.65e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.5e-06 + }, + "meta.llama3-8b-instruct-v1:0": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07 + }, + "meta.llama4-maverick-17b-instruct-v1:0": { + "input_cost_per_token": 2.4e-07, + "input_cost_per_token_batches": 1.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 9.7e-07, + "output_cost_per_token_batches": 4.85e-07, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta.llama4-scout-17b-instruct-v1:0": { + "input_cost_per_token": 1.7e-07, + "input_cost_per_token_batches": 8.5e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.6e-07, + "output_cost_per_token_batches": 3.3e-07, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "meta_llama/Llama-3.3-70B-Instruct": { + "litellm_provider": "meta_llama", + "max_input_tokens": 128000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "meta_llama/Llama-3.3-8B-Instruct": { + "litellm_provider": "meta_llama", + "max_input_tokens": 128000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "meta_llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "litellm_provider": "meta_llama", + "max_input_tokens": 1000000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "meta_llama/Llama-4-Scout-17B-16E-Instruct-FP8": { + "litellm_provider": "meta_llama", + "max_input_tokens": 10000000, + "max_output_tokens": 4028, + "max_tokens": 4028, + "mode": "chat", + "source": "https://llama.developer.meta.com/docs/models", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "minimax.minimax-m2": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "minimax.minimax-m2.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 196000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "minimax/speech-02-hd": { + "input_cost_per_character": 0.0001, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "minimax/speech-02-turbo": { + "input_cost_per_character": 6e-05, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "minimax/speech-2.6-hd": { + "input_cost_per_character": 0.0001, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "minimax/speech-2.6-turbo": { + "input_cost_per_character": 6e-05, + "litellm_provider": "minimax", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "minimax/MiniMax-M2.1": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.1-lightning": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.4e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2.5-lightning": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.4e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 1000000, + "max_output_tokens": 8192 + }, + "minimax/MiniMax-M2": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "cache_read_input_token_cost": 3e-08, + "cache_creation_input_token_cost": 3.75e-07, + "litellm_provider": "minimax", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_system_messages": true, + "max_input_tokens": 200000, + "max_output_tokens": 8192 + }, + "mistral.devstral-2-123b": { + "input_cost_per_token": 4e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "mistral.magistral-small-2509": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true + }, + "mistral.ministral-3-14b-instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "mistral.ministral-3-3b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "mistral.ministral-3-8b-instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "mistral.mistral-7b-instruct-v0:2": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_tool_choice": true + }, + "mistral.mistral-large-2402-v1:0": { + "input_cost_per_token": 8e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_function_calling": true + }, + "mistral.mistral-large-2407-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 9e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "mistral.mistral-large-3-675b-instruct": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "mistral.mistral-small-2402-v1:0": { + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true + }, + "mistral.mixtral-8x7b-instruct-v0:1": { + "input_cost_per_token": 4.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-07, + "supports_tool_choice": true + }, + "mistral.voxtral-mini-3b-2507": { + "input_cost_per_token": 4e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-08, + "supports_audio_input": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "mistral.voxtral-small-24b-2507": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_audio_input": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "mistral/codestral-2405": { + "input_cost_per_token": 1e-06, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/codestral-2508": { + "input_cost_per_token": 3e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 9e-07, + "source": "https://mistral.ai/news/codestral-25-08", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/codestral-latest": { + "input_cost_per_token": 1e-06, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/codestral-mamba-latest": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "mistral/devstral-medium-2507": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://mistral.ai/news/devstral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-small-2505": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://mistral.ai/news/devstral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-small-2507": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://mistral.ai/news/devstral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-small-latest": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://docs.mistral.ai/models/devstral-small-2-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/labs-devstral-small-2512": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://docs.mistral.ai/models/devstral-small-2-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-latest": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-medium-latest": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/devstral-2512": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://mistral.ai/news/devstral-2-vibe-cli", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-medium-2506": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-medium-2509": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-medium-1-2-2509": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-ocr-latest": { + "litellm_provider": "mistral", + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://mistral.ai/pricing#api-pricing" + }, + "mistral/mistral-ocr-2505-completion": { + "litellm_provider": "mistral", + "ocr_cost_per_page": 0.001, + "annotation_cost_per_page": 0.003, + "mode": "ocr", + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://mistral.ai/pricing#api-pricing" + }, + "mistral/magistral-medium-latest": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-small-2506": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-small-latest": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/magistral-small-1-2-2509": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://mistral.ai/pricing#api-pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-embed": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/codestral-embed": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/codestral-embed-2505": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding" + }, + "mistral/mistral-large-2402": { + "input_cost_per_token": 4e-06, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-2407": { + "input_cost_per_token": 3e-06, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-2411": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-large-latest": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-large-3": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-large-2512": { + "input_cost_per_token": 5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.mistral.ai/models/mistral-large-3-25-12", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-medium": { + "input_cost_per_token": 2.7e-06, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 8.1e-06, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-medium-2312": { + "input_cost_per_token": 2.7e-06, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 8.1e-06, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-medium-2505": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-medium-latest": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-medium-3-1-2508": { + "input_cost_per_token": 4e-07, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://mistral.ai/news/mistral-medium-3", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-small": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/mistral-small-latest": { + "input_cost_per_token": 6e-08, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.8e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-small-3-2-2506": { + "input_cost_per_token": 6e-08, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.8e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-3b-2512": { + "input_cost_per_token": 1e-07, + "litellm_provider": "mistral", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-8b-2512": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/ministral-3-14b-2512": { + "input_cost_per_token": 2e-07, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/mistral-tiny": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-codestral-mamba": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "mistral/open-mistral-7b": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mistral-nemo": { + "input_cost_per_token": 3e-07, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mistral-nemo-2407": { + "input_cost_per_token": 3e-07, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://mistral.ai/technology/", + "supports_assistant_prefill": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mixtral-8x22b": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 65336, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/open-mixtral-8x7b": { + "input_cost_per_token": 7e-07, + "litellm_provider": "mistral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 7e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "mistral/pixtral-12b-2409": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/pixtral-large-2411": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "mistral/pixtral-large-latest": { + "input_cost_per_token": 2e-06, + "litellm_provider": "mistral", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot.kimi-k2-thinking": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "moonshotai.kimi-k2.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "moonshot/kimi-k2-0711-preview": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-0905-preview": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-turbo-preview": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 1.15e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 8e-06, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2.5": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, + "moonshot/kimi-k2.6": { + "cache_read_input_token_cost": 1.6e-07, + "input_cost_per_token": 9.5e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://platform.kimi.ai/docs/pricing/chat-k26", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, + "moonshot/kimi-latest": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-latest-128k": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-latest-32k": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-latest-8k": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 2e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/kimi-thinking-preview": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_vision": true + }, + "moonshot/kimi-k2-thinking": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/kimi-k2-thinking-turbo": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 1.15e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 8e-06, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "moonshot/moonshot-v1-128k": { + "input_cost_per_token": 2e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-128k-0430": { + "input_cost_per_token": 2e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-128k-vision-preview": { + "input_cost_per_token": 2e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/moonshot-v1-32k": { + "input_cost_per_token": 1e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-32k-0430": { + "input_cost_per_token": 1e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-32k-vision-preview": { + "input_cost_per_token": 1e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/moonshot-v1-8k": { + "input_cost_per_token": 2e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-8k-0430": { + "input_cost_per_token": 2e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "moonshot/moonshot-v1-8k-vision-preview": { + "input_cost_per_token": 2e-07, + "litellm_provider": "moonshot", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "moonshot/moonshot-v1-auto": { + "input_cost_per_token": 2e-06, + "litellm_provider": "moonshot", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://platform.moonshot.ai/docs/pricing", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "morph/morph-v3-fast": { + "input_cost_per_token": 8e-07, + "litellm_provider": "morph", + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": false + }, + "morph/morph-v3-large": { + "input_cost_per_token": 9e-07, + "litellm_provider": "morph", + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "mode": "chat", + "output_cost_per_token": 1.9e-06, + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": false + }, + "multimodalembedding": { + "input_cost_per_character": 2e-07, + "input_cost_per_image": 0.0001, + "input_cost_per_token": 8e-07, + "input_cost_per_video_per_second": 0.0005, + "input_cost_per_video_per_second_above_15s_interval": 0.002, + "input_cost_per_video_per_second_above_8s_interval": 0.001, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] + }, + "multimodalembedding@001": { + "input_cost_per_character": 2e-07, + "input_cost_per_image": 0.0001, + "input_cost_per_token": 8e-07, + "input_cost_per_video_per_second": 0.0005, + "input_cost_per_video_per_second_above_15s_interval": 0.002, + "input_cost_per_video_per_second_above_8s_interval": 0.001, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] + }, + "nscale/Qwen/QwQ-32B": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 6e-08, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-3B-Instruct": { + "input_cost_per_token": 1e-08, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 3e-08, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-7B-Instruct": { + "input_cost_per_token": 1e-08, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 3e-08, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/black-forest-labs/FLUX.1-schnell": { + "input_cost_per_pixel": 1.3e-09, + "litellm_provider": "nscale", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 3.75e-07, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.75/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 3.75e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { + "input_cost_per_token": 2.5e-08, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.05/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 2.5e-08, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { + "input_cost_per_token": 9e-08, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.18/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 9e-08, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { + "input_cost_per_token": 7e-08, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.14/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 7e-08, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.30/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { + "input_cost_per_token": 2e-07, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/meta-llama/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-08, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.06/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 3e-08, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 9e-08, + "litellm_provider": "nscale", + "mode": "chat", + "output_cost_per_token": 2.9e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/mistralai/mixtral-8x22b-instruct-v0.1": { + "input_cost_per_token": 6e-07, + "litellm_provider": "nscale", + "metadata": { + "notes": "Pricing listed as $1.20/1M tokens total. Assumed 50/50 split for input/output." + }, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/stabilityai/stable-diffusion-xl-base-1.0": { + "input_cost_per_pixel": 3e-09, + "litellm_provider": "nscale", + "mode": "image_generation", + "output_cost_per_pixel": 0.0, + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "nebius/deepseek-ai/DeepSeek-R1": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 8e-07, + "output_cost_per_token": 2.4e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/deepseek-ai/DeepSeek-R1-0528": { + "max_tokens": 164000, + "max_input_tokens": 164000, + "max_output_tokens": 164000, + "input_cost_per_token": 8e-07, + "output_cost_per_token": 2.4e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 7.5e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/deepseek-ai/DeepSeek-V3": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 1.5e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/deepseek-ai/DeepSeek-V3-0324": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 1.5e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/google/gemma-3-27b-it": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 2e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/meta-llama/Llama-3.3-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/meta-llama/Llama-Guard-3-8B": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 6e-08, + "litellm_provider": "nebius", + "mode": "chat", + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/meta-llama/Meta-Llama-3.1-8B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 6e-08, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/meta-llama/Meta-Llama-3.1-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/meta-llama/Meta-Llama-3.1-405B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/mistralai/Mistral-Nemo-Instruct-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.2e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/NousResearch/Hermes-3-Llama-3.1-405B": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/nvidia/Llama-3.1-Nemotron-Ultra-253B-v1": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 1.8e-06, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/nvidia/Llama-3.3-Nemotron-Super-49B-v1": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-235B-A22B": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-32B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-30B-A3B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-14B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 2.4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen3-4B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 2.4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/QwQ-32B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 4.5e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2.5-72B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2.5-32B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 2e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2.5-Coder-7B": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-08, + "output_cost_per_token": 3e-08, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2.5-VL-72B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2-VL-72B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "nebius", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/Qwen/Qwen2-VL-7B-Instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 6e-08, + "litellm_provider": "nebius", + "mode": "chat", + "supports_vision": true, + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/BAAI/bge-en-icl": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "input_cost_per_token": 1e-08, + "output_cost_per_token": 0.0, + "litellm_provider": "nebius", + "mode": "embedding", + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/BAAI/bge-multilingual-gemma2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "input_cost_per_token": 1e-08, + "output_cost_per_token": 0.0, + "litellm_provider": "nebius", + "mode": "embedding", + "source": "https://nebius.com/prices-ai-studio" + }, + "nebius/intfloat/e5-mistral-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "input_cost_per_token": 1e-08, + "output_cost_per_token": 0.0, + "litellm_provider": "nebius", + "mode": "embedding", + "source": "https://nebius.com/prices-ai-studio" + }, + "nvidia.nemotron-nano-12b-v2": { + "input_cost_per_token": 2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_system_messages": true, + "supports_vision": true + }, + "nvidia.nemotron-nano-9b-v2": { + "input_cost_per_token": 6e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.3e-07, + "supports_system_messages": true + }, + "nvidia.nemotron-nano-3-30b": { + "input_cost_per_token": 6e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-07, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_native_structured_output": true + }, + "nvidia.nemotron-super-3-120b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 256000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6.5e-07, + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "o1": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o1-2024-12-17": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o1-pro": { + "input_cost_per_token": 0.00015, + "input_cost_per_token_batches": 7.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 0.0006, + "output_cost_per_token_batches": 0.0003, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o1-pro-2025-03-19": { + "input_cost_per_token": 0.00015, + "input_cost_per_token_batches": 7.5e-05, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 0.0006, + "output_cost_per_token_batches": 0.0003, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "o3": { + "cache_read_input_token_cost": 5e-07, + "cache_read_input_token_cost_flex": 2.5e-07, + "cache_read_input_token_cost_priority": 8.75e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_flex": 1e-06, + "input_cost_per_token_priority": 3.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "output_cost_per_token_flex": 4e-06, + "output_cost_per_token_priority": 1.4e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "o3-2025-04-16": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supported_endpoints": [ + "/v1/responses", + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "o3-deep-research": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 1e-05, + "input_cost_per_token_batches": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 4e-05, + "output_cost_per_token_batches": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "o3-deep-research-2025-06-26": { + "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 1e-05, + "input_cost_per_token_batches": 5e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 4e-05, + "output_cost_per_token_batches": 2e-05, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "o3-mini": { + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "o3-mini-2025-01-31": { + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "o3-pro": { + "input_cost_per_token": 2e-05, + "input_cost_per_token_batches": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-05, + "output_cost_per_token_batches": 4e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "o3-pro-2025-06-10": { + "input_cost_per_token": 2e-05, + "input_cost_per_token_batches": 1e-05, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-05, + "output_cost_per_token_batches": 4e-05, + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "o4-mini": { + "cache_read_input_token_cost": 2.75e-07, + "cache_read_input_token_cost_flex": 1.375e-07, + "cache_read_input_token_cost_priority": 5e-07, + "input_cost_per_token": 1.1e-06, + "input_cost_per_token_flex": 5.5e-07, + "input_cost_per_token_priority": 2e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "output_cost_per_token_flex": 2.2e-06, + "output_cost_per_token_priority": 8e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "o4-mini-2025-04-16": { + "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true + }, + "o4-mini-deep-research": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-06, + "output_cost_per_token_batches": 4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "o4-mini-deep-research-2025-06-26": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "openai", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "responses", + "output_cost_per_token": 8e-06, + "output_cost_per_token_batches": 4e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "oci/meta.llama-3.1-405b-instruct": { + "input_cost_per_token": 1.068e-05, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.068e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-3.2-90b-vision-instruct": { + "input_cost_per_token": 2e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_vision": true + }, + "oci/meta.llama-3.3-70b-instruct": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "oci", + "max_input_tokens": 512000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-4-scout-17b-16e-instruct": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "oci", + "max_input_tokens": 192000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3": { + "input_cost_per_token": 3e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-fast": { + "input_cost_per_token": 5e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-mini": { + "input_cost_per_token": 3e-07, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-3-mini-fast": { + "input_cost_per_token": 6e-07, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-latest": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-03-2025": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-plus-latest": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-reasoning-08-2025": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-vision-07-2025": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_vision": true + }, + "oci/cohere.command-a-translate-08-2025": { + "input_cost_per_token": 9e-08, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 9e-08, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": false, + "supports_response_schema": false + }, + "oci/cohere.command-r-08-2024": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-r-plus-08-2024": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-3.2-11b-vision-instruct": { + "input_cost_per_token": 2e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_vision": true + }, + "oci/meta.llama-3.1-70b-instruct": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/meta.llama-3.3-70b-instruct-fp8-dynamic": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4-fast": { + "input_cost_per_token": 5e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4.1-fast": { + "input_cost_per_token": 5e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4.20": { + "input_cost_per_token": 3e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-4.20-multi-agent": { + "input_cost_per_token": 3e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/xai.grok-code-fast-1": { + "input_cost_per_token": 5e-06, + "litellm_provider": "oci", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/google.gemini-2.5-pro": { + "input_cost_per_token": 1.25e-06, + "litellm_provider": "oci", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1e-05, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "oci/google.gemini-2.5-flash": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "oci", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "oci/google.gemini-2.5-flash-lite": { + "input_cost_per_token": 7.5e-08, + "litellm_provider": "oci", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "oci/cohere.embed-english-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/cohere.embed-english-light-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 384, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/cohere.embed-multilingual-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/cohere.embed-multilingual-light-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 384, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/cohere.embed-english-image-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_embedding_image_input": true + }, + "oci/cohere.embed-english-light-image-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 384, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_embedding_image_input": true + }, + "oci/cohere.embed-multilingual-light-image-v3.0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "oci", + "max_input_tokens": 512, + "max_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 384, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_embedding_image_input": true + }, + "oci/cohere.embed-v4.0": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_embedding_image_input": true + }, + "ollama/codegeex4": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": false + }, + "ollama/codegemma": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/codellama": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/deepseek-coder-v2-base": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-base": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-coder-v2-lite-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/deepseek-v3.1:671b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/gpt-oss:120b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/gpt-oss:20b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/internlm2_5-20b-chat": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/llama2": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama2-uncensored": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/llama2:13b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama2:70b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama2:7b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama3": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama3.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/llama3:70b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/llama3:8b": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "ollama/mistral": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "completion", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mistral-7B-Instruct-v0.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mistral-7B-Instruct-v0.2": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mistral-large-instruct-2407": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mixtral-8x22B-Instruct-v0.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/orca-mini": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "ollama/qwen3-coder:480b-cloud": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0, + "supports_function_calling": true + }, + "ollama/vicuna": { + "input_cost_per_token": 0.0, + "litellm_provider": "ollama", + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "omni-moderation-2024-09-26": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "omni-moderation-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "openai.gpt-oss-120b-1:0": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openai.gpt-oss-20b-1:0": { + "input_cost_per_token": 7e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openai.gpt-oss-safeguard-120b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_system_messages": true + }, + "openai.gpt-oss-safeguard-20b": { + "input_cost_per_token": 7e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_system_messages": true + }, + "openrouter/anthropic/claude-3-haiku": { + "input_cost_per_image": 0.0004, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "max_input_tokens": 200000, + "max_output_tokens": 4096 + }, + "openrouter/anthropic/claude-3.5-sonnet": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-3.7-sonnet": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 3e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4.1": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 1.875e-05, + "cache_creation_input_token_cost_above_1hr": 3e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-sonnet-4": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost": 3e-07, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-sonnet-4.6": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost": 3e-07, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "source": "https://openrouter.ai/anthropic/claude-sonnet-4.6", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_minimal_reasoning_effort": true + }, + "openrouter/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_minimal_reasoning_effort": true + }, + "openrouter/anthropic/claude-sonnet-4.5": { + "input_cost_per_image": 0.0048, + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "openrouter/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "openrouter/anthropic/claude-opus-4.7": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346 + }, + "openrouter/bytedance/ui-tars-1.5-7b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://openrouter.ai/api/v1/models/bytedance/ui-tars-1.5-7b", + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-chat": { + "input_cost_per_token": 1.4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.8e-07, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-chat-v3-0324": { + "input_cost_per_token": 1.4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.8e-07, + "supports_prompt_caching": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-chat-v3.1": { + "input_cost_per_token": 2e-07, + "input_cost_per_token_cache_hit": 2e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 8e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-v3.2": { + "input_cost_per_token": 2.8e-07, + "input_cost_per_token_cache_hit": 2.8e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-v3.2-exp": { + "input_cost_per_token": 2e-07, + "input_cost_per_token_cache_hit": 2e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": false, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-r1": { + "input_cost_per_token": 5.5e-07, + "input_cost_per_token_cache_hit": 1.4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 65336, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.19e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/deepseek/deepseek-r1-0528": { + "input_cost_per_token": 5e-07, + "input_cost_per_token_cache_hit": 1.4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 65336, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.15e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/google/gemini-2.0-flash-001": { + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-2.5-flash": { + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 3e-07, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-2.5-pro": { + "input_cost_per_audio_token": 7e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/google/gemini-3-pro-preview": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "output_cost_per_token_batches": 6e-06, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true + }, + "openrouter/google/gemini-3-flash-preview": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 5e-07, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-06, + "output_cost_per_token": 3e-06, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "openrouter/google/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, + "openrouter/google/gemini-3.1-pro-preview": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 2e-06, + "input_cost_per_token_above_200k_tokens": 4e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_above_200k_tokens": 1.8e-05, + "source": "https://openrouter.ai/google/gemini-3.1-pro-preview", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/gryphe/mythomax-l2-13b": { + "input_cost_per_token": 1.875e-06, + "litellm_provider": "openrouter", + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.875e-06, + "supports_tool_choice": true + }, + "openrouter/mancer/weaver": { + "input_cost_per_token": 5.625e-06, + "litellm_provider": "openrouter", + "max_tokens": 2000, + "mode": "chat", + "output_cost_per_token": 5.625e-06, + "supports_tool_choice": true, + "max_input_tokens": 8000, + "max_output_tokens": 2000 + }, + "openrouter/meta-llama/llama-3-70b-instruct": { + "input_cost_per_token": 5.9e-07, + "litellm_provider": "openrouter", + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 7.9e-07, + "supports_tool_choice": true, + "max_input_tokens": 8192, + "max_output_tokens": 8000 + }, + "openrouter/minimax/minimax-m2": { + "input_cost_per_token": 2.55e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 204800, + "mode": "chat", + "output_cost_per_token": 1.02e-06, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/mistralai/devstral-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/mistralai/ministral-3b-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/ministral-8b-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/ministral-14b-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 2e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/mistral-large-2512": { + "input_cost_per_image": 0, + "input_cost_per_token": 5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/mistralai/mistral-7b-instruct": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "openrouter", + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 1.3e-07, + "supports_tool_choice": true, + "max_input_tokens": 32768, + "max_output_tokens": 8191 + }, + "openrouter/mistralai/mistral-large": { + "input_cost_per_token": 8e-06, + "litellm_provider": "openrouter", + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2.4e-05, + "supports_tool_choice": true, + "max_input_tokens": 128000, + "max_output_tokens": 8191 + }, + "openrouter/mistralai/mistral-small-3.1-24b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_tool_choice": true, + "max_input_tokens": 131072, + "max_output_tokens": 131072 + }, + "openrouter/mistralai/mistral-small-3.2-24b-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_tool_choice": true, + "max_input_tokens": 128000, + "max_output_tokens": 128000 + }, + "openrouter/mistralai/mixtral-8x22b-instruct": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "openrouter", + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 6.5e-07, + "supports_tool_choice": true, + "max_input_tokens": 65536, + "max_output_tokens": 65536 + }, + "openrouter/moonshotai/kimi-k2.5": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://openrouter.ai/moonshotai/kimi-k2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true + }, + "openrouter/openai/gpt-3.5-turbo": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_tool_choice": true, + "max_input_tokens": 16385, + "max_output_tokens": 4096 + }, + "openrouter/openai/gpt-3.5-turbo-16k": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_tool_choice": true, + "max_input_tokens": 16385, + "max_output_tokens": 4096 + }, + "openrouter/openai/gpt-4": { + "input_cost_per_token": 3e-05, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_tool_choice": true, + "max_input_tokens": 8191, + "max_output_tokens": 4096 + }, + "openrouter/openai/gpt-4.1": { + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-mini": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4.1-nano": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4o": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-4o-2024-05-13": { + "input_cost_per_token": 5e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5-chat": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5-codex": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5.2-codex": { + "cache_read_input_token_cost": 1.75e-07, + "input_cost_per_token": 1.75e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5-mini": { + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5-nano": { + "cache_read_input_token_cost": 5e-09, + "input_cost_per_token": 5e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-5.1-codex-max": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "source": "https://openrouter.ai/openai/gpt-5.1-codex-max", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5.2": { + "input_cost_per_image": 0, + "cache_read_input_token_cost": 1.75e-07, + "input_cost_per_token": 1.75e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5.2-chat": { + "input_cost_per_image": 0, + "cache_read_input_token_cost": 1.75e-07, + "input_cost_per_token": 1.75e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-05, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-5.2-pro": { + "input_cost_per_image": 0, + "input_cost_per_token": 2.1e-05, + "litellm_provider": "openrouter", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000168, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/gpt-oss-120b": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-07, + "source": "https://openrouter.ai/openai/gpt-oss-120b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openrouter/openai/gpt-oss-20b": { + "input_cost_per_token": 2e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://openrouter.ai/openai/gpt-oss-20b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "openrouter/openai/o1": { + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/openai/o3-mini": { + "input_cost_per_token": 1.1e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/openai/o3-mini-high": { + "input_cost_per_token": 1.1e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "openrouter/qwen/qwen-2.5-coder-32b-instruct": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 33792, + "max_output_tokens": 33792, + "max_tokens": 33792, + "mode": "chat", + "output_cost_per_token": 1.8e-07, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen-vl-plus": { + "input_cost_per_token": 2.1e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 6.3e-07, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3-coder": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262100, + "max_output_tokens": 262100, + "max_tokens": 262100, + "mode": "chat", + "output_cost_per_token": 9.5e-07, + "source": "https://openrouter.ai/qwen/qwen3-coder", + "supports_tool_choice": true, + "supports_function_calling": true + }, + "openrouter/qwen/qwen3-coder-plus": { + "input_cost_per_token": 1e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 997952, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://openrouter.ai/qwen/qwen3-coder-plus", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen3-235b-a22b-2507": { + "input_cost_per_token": 7.1e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://openrouter.ai/qwen/qwen3-235b-a22b-2507", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen3-235b-a22b-thinking-2507": { + "input_cost_per_token": 1.1e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://openrouter.ai/qwen/qwen3-235b-a22b-thinking-2507", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/qwen/qwen3.6-plus": { + "input_cost_per_token": 3.25e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.95e-06, + "source": "https://openrouter.ai/qwen/qwen3.6-plus", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3.5-35b-a3b": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://openrouter.ai/qwen/qwen3.5-35b-a3b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3.5-27b": { + "input_cost_per_token": 3e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 2.4e-06, + "source": "https://openrouter.ai/qwen/qwen3.5-27b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3.5-122b-a10b": { + "input_cost_per_token": 4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://openrouter.ai/qwen/qwen3.5-122b-a10b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3.5-flash-02-23": { + "input_cost_per_token": 1e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 4e-07, + "source": "https://openrouter.ai/qwen/qwen3.5-flash-02-23", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3.5-plus-02-15": { + "input_cost_per_token": 4e-07, + "input_cost_per_token_above_256k_tokens": 5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 2.4e-06, + "output_cost_per_token_above_256k_tokens": 3e-06, + "source": "https://openrouter.ai/qwen/qwen3.5-plus-02-15", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/qwen/qwen3.5-397b-a17b": { + "input_cost_per_token": 6e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "source": "https://openrouter.ai/qwen/qwen3.5-397b-a17b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "openrouter/switchpoint/router": { + "input_cost_per_token": 8.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 3.4e-06, + "source": "https://openrouter.ai/switchpoint/router", + "supports_tool_choice": true + }, + "openrouter/undi95/remm-slerp-l2-13b": { + "input_cost_per_token": 1.875e-06, + "litellm_provider": "openrouter", + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.875e-06, + "supports_tool_choice": true, + "max_input_tokens": 6144, + "max_output_tokens": 4096 + }, + "openrouter/x-ai/grok-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "openrouter", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://openrouter.ai/x-ai/grok-4", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "openrouter/z-ai/glm-4.6": { + "input_cost_per_token": 4e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1.75e-06, + "source": "https://openrouter.ai/z-ai/glm-4.6", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/z-ai/glm-4.6:exacto": { + "input_cost_per_token": 4.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1.9e-06, + "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/xiaomi/mimo-v2-flash": { + "input_cost_per_token": 9e-08, + "output_cost_per_token": 2.9e-07, + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 0.0, + "litellm_provider": "openrouter", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_prompt_caching": false + }, + "openrouter/z-ai/glm-4.7": { + "input_cost_per_token": 4e-07, + "output_cost_per_token": 1.5e-06, + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 0.0, + "litellm_provider": "openrouter", + "max_input_tokens": 202752, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_assistant_prefill": true + }, + "openrouter/z-ai/glm-4.7-flash": { + "input_cost_per_token": 7e-08, + "output_cost_per_token": 4e-07, + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 0.0, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false + }, + "openrouter/z-ai/glm-5": { + "input_cost_per_token": 8e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 202752, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.56e-06, + "source": "https://openrouter.ai/z-ai/glm-5", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/minimax/minimax-m2.1": { + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1.2e-06, + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 0.0, + "litellm_provider": "openrouter", + "max_input_tokens": 204000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_prompt_caching": false, + "supports_computer_use": false + }, + "openrouter/minimax/minimax-m2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.1e-06, + "cache_read_input_token_cost": 1.5e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 196608, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "source": "https://openrouter.ai/minimax/minimax-m2.5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_prompt_caching": true, + "supports_computer_use": false + }, + "openrouter/openrouter/auto": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_audio_input": true, + "supports_video_input": true + }, + "openrouter/openrouter/free": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true + }, + "openrouter/openrouter/bodybuilder": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "openrouter", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat" + }, + "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 6.7e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 6.7e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/deepseek-r1-distill-llama-70b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/llama-3-1-8b-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Meta-Llama-3_1-70B-Instruct": { + "input_cost_per_token": 6.7e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 6.7e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-1-70b-instruct", + "supports_function_calling": false, + "supports_response_schema": false, + "supports_tool_choice": false + }, + "ovhcloud/Meta-Llama-3_3-70B-Instruct": { + "input_cost_per_token": 6.7e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 6.7e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/meta-llama-3-3-70b-instruct", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mistral-7B-Instruct-v0.3": { + "input_cost_per_token": 1e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 127000, + "max_output_tokens": 127000, + "max_tokens": 127000, + "mode": "chat", + "output_cost_per_token": 1e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-7b-instruct-v0-3", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mistral-Nemo-Instruct-2407": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 118000, + "max_output_tokens": 118000, + "max_tokens": 118000, + "mode": "chat", + "output_cost_per_token": 1.3e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-nemo-instruct-2407", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/Mistral-Small-3.2-24B-Instruct-2506": { + "input_cost_per_token": 9e-08, + "litellm_provider": "ovhcloud", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.8e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/mistral-small-3-2-24b-instruct-2506", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "ovhcloud/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 6.3e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 6.3e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/mixtral-8x7b-instruct-v0-1", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 8.7e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 8.7e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-coder-32b-instruct", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/Qwen2.5-VL-72B-Instruct": { + "input_cost_per_token": 9.1e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 9.1e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen2-5-vl-72b-instruct", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "ovhcloud/Qwen3-32B": { + "input_cost_per_token": 8e-08, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.3e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/qwen3-32b", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "ovhcloud/gpt-oss-120b": { + "input_cost_per_token": 8e-08, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-120b", + "supports_function_calling": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/gpt-oss-20b": { + "input_cost_per_token": 4e-08, + "litellm_provider": "ovhcloud", + "max_input_tokens": 131000, + "max_output_tokens": 131000, + "max_tokens": 131000, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/gpt-oss-20b", + "supports_function_calling": false, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "ovhcloud/llava-v1.6-mistral-7b-hf": { + "input_cost_per_token": 2.9e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 2.9e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/llava-next-mistral-7b", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "ovhcloud/mamba-codestral-7B-v0.1": { + "input_cost_per_token": 1.9e-07, + "litellm_provider": "ovhcloud", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.9e-07, + "source": "https://endpoints.ai.cloud.ovh.net/models/mamba-codestral-7b-v0-1", + "supports_function_calling": false, + "supports_response_schema": true, + "supports_tool_choice": false + }, + "palm/chat-bison": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/chat-bison-001": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-001": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-safety-off": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "palm/text-bison-safety-recitation-off": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "palm", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 1.25e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "parallel_ai/search": { + "input_cost_per_query": 0.004, + "litellm_provider": "parallel_ai", + "mode": "search" + }, + "parallel_ai/search-pro": { + "input_cost_per_query": 0.009, + "litellm_provider": "parallel_ai", + "mode": "search" + }, + "perplexity/codellama-34b-instruct": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.4e-06 + }, + "perplexity/codellama-70b-instruct": { + "input_cost_per_token": 7e-07, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.8e-06 + }, + "perplexity/llama-2-70b-chat": { + "input_cost_per_token": 7e-07, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-06 + }, + "perplexity/llama-3.1-70b-instruct": { + "input_cost_per_token": 1e-06, + "litellm_provider": "perplexity", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-06 + }, + "perplexity/llama-3.1-8b-instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "perplexity", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2e-07 + }, + "perplexity/mistral-7b-instruct": { + "input_cost_per_token": 7e-08, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-07 + }, + "perplexity/mixtral-8x7b-instruct": { + "input_cost_per_token": 7e-08, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-07 + }, + "perplexity/pplx-70b-chat": { + "input_cost_per_token": 7e-07, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-06 + }, + "perplexity/pplx-70b-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0.0, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-06 + }, + "perplexity/pplx-7b-chat": { + "input_cost_per_token": 7e-08, + "litellm_provider": "perplexity", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.8e-07 + }, + "perplexity/pplx-7b-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0.0, + "litellm_provider": "perplexity", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.8e-07 + }, + "perplexity/sonar": { + "input_cost_per_token": 1e-06, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-06, + "search_context_cost_per_query": { + "search_context_size_high": 0.012, + "search_context_size_low": 0.005, + "search_context_size_medium": 0.008 + }, + "supports_web_search": true + }, + "perplexity/sonar-deep-research": { + "citation_cost_per_token": 2e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_reasoning_token": 3e-06, + "output_cost_per_token": 8e-06, + "search_context_cost_per_query": { + "search_context_size_high": 0.005, + "search_context_size_low": 0.005, + "search_context_size_medium": 0.005 + }, + "supports_reasoning": true, + "supports_web_search": true + }, + "perplexity/sonar-medium-chat": { + "input_cost_per_token": 6e-07, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.8e-06 + }, + "perplexity/sonar-medium-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0, + "litellm_provider": "perplexity", + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "mode": "chat", + "output_cost_per_token": 1.8e-06 + }, + "perplexity/sonar-pro": { + "input_cost_per_token": 3e-06, + "litellm_provider": "perplexity", + "max_input_tokens": 200000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.014, + "search_context_size_low": 0.006, + "search_context_size_medium": 0.01 + }, + "supports_web_search": true + }, + "perplexity/sonar-reasoning": { + "input_cost_per_token": 1e-06, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "search_context_cost_per_query": { + "search_context_size_high": 0.014, + "search_context_size_low": 0.005, + "search_context_size_medium": 0.008 + }, + "supports_reasoning": true, + "supports_web_search": true + }, + "perplexity/sonar-reasoning-pro": { + "input_cost_per_token": 2e-06, + "litellm_provider": "perplexity", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "search_context_cost_per_query": { + "search_context_size_high": 0.014, + "search_context_size_low": 0.006, + "search_context_size_medium": 0.01 + }, + "supports_reasoning": true, + "supports_web_search": true + }, + "perplexity/sonar-small-chat": { + "input_cost_per_token": 7e-08, + "litellm_provider": "perplexity", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.8e-07 + }, + "perplexity/sonar-small-online": { + "input_cost_per_request": 0.005, + "input_cost_per_token": 0, + "litellm_provider": "perplexity", + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "mode": "chat", + "output_cost_per_token": 2.8e-07 + }, + "publicai/swiss-ai/apertus-8b-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": false, + "supports_tool_choice": false + }, + "publicai/swiss-ai/apertus-70b-instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": false, + "supports_tool_choice": false + }, + "publicai/aisingapore/Gemma-SEA-LION-v4-27B-IT": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/salamandra-7b-instruct-tools-16k": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/BSC-LT/ALIA-40b-instruct_Q8_0": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Instruct": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "perplexity/preset/fast-search": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_preset": true, + "supports_function_calling": true + }, + "perplexity/preset/pro-search": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_preset": true, + "supports_function_calling": true + }, + "perplexity/preset/deep-research": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_preset": true, + "supports_function_calling": true + }, + "perplexity/preset/advanced-deep-research": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_preset": true, + "supports_function_calling": true + }, + "perplexity/openai/gpt-5.2": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": true, + "supports_function_calling": true + }, + "perplexity/openai/gpt-5.1": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/openai/gpt-5-mini": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/anthropic/claude-opus-4-6": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/anthropic/claude-opus-4-7": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/anthropic/claude-opus-4-5": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true + }, + "perplexity/anthropic/claude-sonnet-4-5": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/anthropic/claude-haiku-4-5": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/google/gemini-3-pro-preview": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/google/gemini-3-flash-preview": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/google/gemini-2.5-pro": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/google/gemini-2.5-flash": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/xai/grok-4-1-fast-non-reasoning": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/perplexity/sonar": { + "litellm_provider": "perplexity", + "mode": "responses", + "supports_web_search": true, + "supports_reasoning": false, + "supports_function_calling": true + }, + "perplexity/pplx-embed-v1-0.6b": { + "input_cost_per_token": 4e-09, + "litellm_provider": "perplexity", + "max_input_tokens": 32768, + "max_tokens": 32768, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1024, + "source": "https://docs.perplexity.ai/docs/embeddings/quickstart" + }, + "perplexity/pplx-embed-v1-4b": { + "input_cost_per_token": 3e-08, + "litellm_provider": "perplexity", + "max_input_tokens": 32768, + "max_tokens": 32768, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 2560, + "source": "https://docs.perplexity.ai/docs/embeddings/quickstart" + }, + "publicai/aisingapore/Qwen-SEA-LION-v4-32B-IT": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "publicai/allenai/Olmo-3-7B-Think": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "publicai/allenai/Olmo-3-32B-Think": { + "input_cost_per_token": 0.0, + "litellm_provider": "publicai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://platform.publicai.co/docs", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "qwen.qwen3-coder-480b-a35b-v1:0": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1.8e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_native_structured_output": true + }, + "qwen.qwen3-235b-a22b-2507-v1:0": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_native_structured_output": true + }, + "qwen.qwen3-coder-30b-a3b-v1:0": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_native_structured_output": true + }, + "qwen.qwen3-32b-v1:0": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_native_structured_output": true + }, + "qwen.qwen3-next-80b-a3b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_native_structured_output": true + }, + "qwen.qwen3-vl-235b-a22b": { + "input_cost_per_token": 5.3e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.66e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_vision": true, + "supports_native_structured_output": true + }, + "qwen.qwen3-coder-next": { + "input_cost_per_token": 5e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 262144, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "recraft/recraftv2": { + "litellm_provider": "recraft", + "mode": "image_generation", + "output_cost_per_image": 0.022, + "source": "https://www.recraft.ai/docs#pricing", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "recraft/recraftv3": { + "litellm_provider": "recraft", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://www.recraft.ai/docs#pricing", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "replicate/meta/llama-2-13b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-07, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-13b-chat": { + "input_cost_per_token": 1e-07, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-07, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-70b": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-70b-chat": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-7b": { + "input_cost_per_token": 5e-08, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_tool_choice": true + }, + "replicate/meta/llama-2-7b-chat": { + "input_cost_per_token": 5e-08, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-70b": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "replicate", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-70b-instruct": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "replicate", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.75e-06, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-8b": { + "input_cost_per_token": 5e-08, + "litellm_provider": "replicate", + "max_input_tokens": 8086, + "max_output_tokens": 8086, + "max_tokens": 8086, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_tool_choice": true + }, + "replicate/meta/llama-3-8b-instruct": { + "input_cost_per_token": 5e-08, + "litellm_provider": "replicate", + "max_input_tokens": 8086, + "max_output_tokens": 8086, + "max_tokens": 8086, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_tool_choice": true + }, + "replicate/mistralai/mistral-7b-instruct-v0.2": { + "input_cost_per_token": 5e-08, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_tool_choice": true + }, + "replicate/mistralai/mistral-7b-v0.1": { + "input_cost_per_token": 5e-08, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2.5e-07, + "supports_tool_choice": true + }, + "replicate/mistralai/mixtral-8x7b-instruct-v0.1": { + "input_cost_per_token": 3e-07, + "litellm_provider": "replicate", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-06, + "supports_tool_choice": true + }, + "replicate/openai/gpt-5": { + "input_cost_per_token": 1.25e-06, + "output_cost_per_token": 1e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicateopenai/gpt-oss-20b": { + "input_cost_per_token": 9e-08, + "output_cost_per_token": 3.6e-07, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-4.5-haiku": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 5e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/ibm-granite/granite-3.3-8b-instruct": { + "input_cost_per_token": 3e-08, + "output_cost_per_token": 2.5e-07, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4o": { + "input_cost_per_token": 2.5e-06, + "output_cost_per_token": 1e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_audio_input": true, + "supports_audio_output": true + }, + "replicate/openai/o4-mini": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 4e-06, + "output_cost_per_reasoning_token": 4e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/o1-mini": { + "input_cost_per_token": 1.1e-06, + "output_cost_per_token": 4.4e-06, + "output_cost_per_reasoning_token": 4.4e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/o1": { + "input_cost_per_token": 1.5e-05, + "output_cost_per_token": 6e-05, + "output_cost_per_reasoning_token": 6e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4o-mini": { + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/qwen/qwen3-235b-a22b-instruct-2507": { + "input_cost_per_token": 2.64e-07, + "output_cost_per_token": 1.06e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-4-sonnet": { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/deepseek-ai/deepseek-v3": { + "input_cost_per_token": 1.45e-06, + "output_cost_per_token": 1.45e-06, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/anthropic/claude-3.7-sonnet": { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/anthropic/claude-3.5-haiku": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 5e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/anthropic/claude-3.5-sonnet": { + "input_cost_per_token": 3.75e-06, + "output_cost_per_token": 1.875e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/google/gemini-3-pro": { + "input_cost_per_token": 2e-06, + "output_cost_per_token": 1.2e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/anthropic/claude-4.5-sonnet": { + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1.5e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "replicate/openai/gpt-4.1": { + "input_cost_per_token": 2e-06, + "output_cost_per_token": 8e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-4.1-nano": { + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-4.1-mini": { + "input_cost_per_token": 4e-07, + "output_cost_per_token": 1.6e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-5-nano": { + "input_cost_per_token": 5e-08, + "output_cost_per_token": 4e-07, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/openai/gpt-5-mini": { + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 2e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/google/gemini-2.5-flash": { + "input_cost_per_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "replicate/openai/gpt-oss-120b": { + "input_cost_per_token": 1.8e-07, + "output_cost_per_token": 7.2e-07, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/deepseek-ai/deepseek-v3.1": { + "input_cost_per_token": 6.72e-07, + "output_cost_per_token": 2.016e-06, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true + }, + "replicate/xai/grok-4": { + "input_cost_per_token": 7.2e-06, + "output_cost_per_token": 3.6e-05, + "litellm_provider": "replicate", + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true + }, + "replicate/deepseek-ai/deepseek-r1": { + "input_cost_per_token": 3.75e-06, + "output_cost_per_token": 1e-05, + "output_cost_per_reasoning_token": 1e-05, + "litellm_provider": "replicate", + "mode": "chat", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_reasoning": true, + "supports_system_messages": true + }, + "rerank-english-v2.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-english-v3.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-multilingual-v2.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-multilingual-v3.0": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "rerank-v3.5": { + "input_cost_per_query": 0.002, + "input_cost_per_token": 0.0, + "litellm_provider": "cohere", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_query_tokens": 2048, + "max_tokens": 4096, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/ranking/nvidia/llama-3.2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-13b": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-13b-f": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-70b": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-70b-b-f": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-7b": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "completion", + "output_cost_per_token": 0.0 + }, + "sagemaker/meta-textgeneration-llama-2-7b-f": { + "input_cost_per_token": 0.0, + "litellm_provider": "sagemaker", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "sambanova/MiniMax-M2.7": { + "input_cost_per_token": 3e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "sambanova/DeepSeek-R1": { + "input_cost_per_token": 5e-06, + "litellm_provider": "sambanova", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7e-06, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 7e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.4e-06, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/DeepSeek-V3-0324": { + "input_cost_per_token": 3e-06, + "litellm_provider": "sambanova", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4.5e-06, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "sambanova/Llama-4-Maverick-17B-128E-Instruct": { + "input_cost_per_token": 6.3e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "metadata": { + "notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount" + }, + "mode": "chat", + "output_cost_per_token": 1.8e-06, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "sambanova/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 4e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "metadata": { + "notes": "For vision models, images are converted to 6432 input tokens and are billed at that amount" + }, + "mode": "chat", + "output_cost_per_token": 7e-07, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-3.1-405B-Instruct": { + "input_cost_per_token": 5e-06, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-3.1-8B-Instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-3.2-1B-Instruct": { + "input_cost_per_token": 4e-08, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 8e-08, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Meta-Llama-3.2-3B-Instruct": { + "input_cost_per_token": 8e-08, + "litellm_provider": "sambanova", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.6e-07, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Meta-Llama-3.3-70B-Instruct": { + "input_cost_per_token": 6e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "sambanova/Meta-Llama-Guard-3-8B": { + "input_cost_per_token": 3e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/QwQ-32B": { + "input_cost_per_token": 5e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-06, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/Qwen2-Audio-7B-Instruct": { + "input_cost_per_token": 5e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 0.0001, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_audio_input": true + }, + "sambanova/Qwen3-32B": { + "input_cost_per_token": 4e-07, + "litellm_provider": "sambanova", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-07, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "sambanova/DeepSeek-V3.1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 3e-06, + "output_cost_per_token": 4.5e-06, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "sambanova/gpt-oss-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-06, + "output_cost_per_token": 4.5e-06, + "litellm_provider": "sambanova", + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "source": "https://cloud.sambanova.ai/plans/pricing" + }, + "snowflake/claude-3-5-sonnet": { + "litellm_provider": "snowflake", + "max_input_tokens": 18000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_computer_use": true + }, + "snowflake/deepseek-r1": { + "litellm_provider": "snowflake", + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_reasoning": true + }, + "snowflake/gemma-7b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/jamba-1.5-large": { + "litellm_provider": "snowflake", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/jamba-1.5-mini": { + "litellm_provider": "snowflake", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/jamba-instruct": { + "litellm_provider": "snowflake", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama2-70b-chat": { + "litellm_provider": "snowflake", + "max_input_tokens": 4096, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3-8b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.1-405b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.1-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.1-8b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.2-1b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.2-3b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/llama3.3-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mistral-7b": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mistral-large": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mistral-large2": { + "litellm_provider": "snowflake", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/mixtral-8x7b": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/reka-core": { + "litellm_provider": "snowflake", + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/reka-flash": { + "litellm_provider": "snowflake", + "max_input_tokens": 100000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/snowflake-arctic": { + "litellm_provider": "snowflake", + "max_input_tokens": 4096, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/snowflake-llama-3.1-405b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "snowflake/snowflake-llama-3.3-70b": { + "litellm_provider": "snowflake", + "max_input_tokens": 8000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat" + }, + "stability/sd3": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.065, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/sd3-large": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.065, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/sd3-large-turbo": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/sd3-medium": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.035, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/sd3.5-large": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.065, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/sd3.5-large-turbo": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/sd3.5-medium": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.035, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/stable-image-ultra": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.08, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability/inpaint": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/outpaint": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.004, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/erase": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/search-and-replace": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/search-and-recolor": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/remove-background": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/replace-background-and-relight": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.008, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/sketch": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/structure": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/style": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.005, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/style-transfer": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.008, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/fast": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.002, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/conservative": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.04, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/creative": { + "litellm_provider": "stability", + "mode": "image_edit", + "output_cost_per_image": 0.06, + "supported_endpoints": [ + "/v1/images/edits" + ] + }, + "stability/stable-image-core": { + "litellm_provider": "stability", + "mode": "image_generation", + "output_cost_per_image": 0.03, + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "stability.sd3-5-large-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.08 + }, + "stability.sd3-large-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.08 + }, + "stability.stable-image-core-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.04 + }, + "stability.stable-conservative-upscale-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.4 + }, + "stability.stable-creative-upscale-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.6 + }, + "stability.stable-fast-upscale-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.03 + }, + "stability.stable-outpaint-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.06 + }, + "stability.stable-image-control-sketch-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-control-structure-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-erase-object-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-inpaint-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-remove-background-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-search-recolor-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-search-replace-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-image-style-guide-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.07 + }, + "stability.stable-style-transfer-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "mode": "image_edit", + "output_cost_per_image": 0.08 + }, + "stability.stable-image-core-v1:1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.04 + }, + "stability.stable-image-ultra-v1:0": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.14 + }, + "stability.stable-image-ultra-v1:1": { + "litellm_provider": "bedrock", + "max_input_tokens": 77, + "max_tokens": 77, + "mode": "image_generation", + "output_cost_per_image": 0.14 + }, + "standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 3.81469e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 4.359e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 4.359e-08, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_pixel": 0.0 + }, + "linkup/search": { + "input_cost_per_query": 0.00587, + "litellm_provider": "linkup", + "mode": "search" + }, + "linkup/search-deep": { + "input_cost_per_query": 0.05867, + "litellm_provider": "linkup", + "mode": "search" + }, + "tavily/search": { + "input_cost_per_query": 0.008, + "litellm_provider": "tavily", + "mode": "search" + }, + "tavily/search-advanced": { + "input_cost_per_query": 0.016, + "litellm_provider": "tavily", + "mode": "search" + }, + "text-completion-codestral/codestral-2405": { + "input_cost_per_token": 0.0, + "litellm_provider": "text-completion-codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "completion", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/" + }, + "text-completion-codestral/codestral-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "text-completion-codestral", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "completion", + "output_cost_per_token": 0.0, + "source": "https://docs.mistral.ai/capabilities/code_generation/" + }, + "text-embedding-004": { + "deprecation_date": "2026-01-14", + "input_cost_per_character": 2.5e-08, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-005": { + "input_cost_per_character": 2.5e-08, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-3-large": { + "input_cost_per_token": 1.3e-07, + "input_cost_per_token_batches": 6.5e-08, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_cost_per_token_batches": 0.0, + "output_vector_size": 3072 + }, + "text-embedding-3-small": { + "input_cost_per_token": 2e-08, + "input_cost_per_token_batches": 1e-08, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_cost_per_token_batches": 0.0, + "output_vector_size": 1536 + }, + "text-embedding-ada-002": { + "input_cost_per_token": 1e-07, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536 + }, + "text-embedding-ada-002-v2": { + "input_cost_per_token": 1e-07, + "input_cost_per_token_batches": 5e-08, + "litellm_provider": "openai", + "max_input_tokens": 8191, + "max_tokens": 8191, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_cost_per_token_batches": 0.0 + }, + "text-embedding-large-exp-03-07": { + "input_cost_per_character": 2.5e-08, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 8192, + "max_tokens": 8192, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 3072, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-embedding-preview-0409": { + "input_cost_per_token": 6.25e-09, + "input_cost_per_token_batch_requests": 5e-09, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 3072, + "max_tokens": 3072, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "text-moderation-007": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "text-moderation-latest": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "text-moderation-stable": { + "input_cost_per_token": 0.0, + "litellm_provider": "openai", + "max_input_tokens": 32768, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "moderation", + "output_cost_per_token": 0.0 + }, + "text-multilingual-embedding-002": { + "input_cost_per_character": 2.5e-08, + "input_cost_per_token": 1e-07, + "litellm_provider": "vertex_ai-embedding-models", + "max_input_tokens": 2048, + "max_tokens": 2048, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models" + }, + "text-unicorn": { + "input_cost_per_token": 1e-05, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 2.8e-05, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-unicorn@001": { + "input_cost_per_token": 1e-05, + "litellm_provider": "vertex_ai-text-models", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "completion", + "output_cost_per_token": 2.8e-05, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "together-ai-21.1b-41b": { + "input_cost_per_token": 8e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8e-07 + }, + "together-ai-4.1b-8b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 2e-07 + }, + "together-ai-41.1b-80b": { + "input_cost_per_token": 9e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 9e-07 + }, + "together-ai-8.1b-21b": { + "input_cost_per_token": 3e-07, + "litellm_provider": "together_ai", + "max_tokens": 1000, + "mode": "chat", + "output_cost_per_token": 3e-07 + }, + "together-ai-81.1b-110b": { + "input_cost_per_token": 1.8e-06, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 1.8e-06 + }, + "together-ai-embedding-151m-to-350m": { + "input_cost_per_token": 1.6e-08, + "litellm_provider": "together_ai", + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "together-ai-embedding-up-to-150m": { + "input_cost_per_token": 8e-09, + "litellm_provider": "together_ai", + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "together_ai/baai/bge-base-en-v1.5": { + "input_cost_per_token": 8e-09, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 768 + }, + "together_ai/BAAI/bge-base-en-v1.5": { + "input_cost_per_token": 8e-09, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 768 + }, + "together-ai-up-to-4b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 1e-07 + }, + "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen2.5-7B-Instruct-Turbo": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-235B-A22B-Instruct-2507-tput": { + "input_cost_per_token": 2e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 262000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://www.together.ai/models/qwen3-235b-a22b-instruct-2507-fp8", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "input_cost_per_token": 6.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://www.together.ai/models/qwen3-235b-a22b-thinking-2507", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-235B-A22B-fp8-tput": { + "input_cost_per_token": 2e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://www.together.ai/models/qwen3-235b-a22b-fp8-tput", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_tool_choice": false + }, + "together_ai/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": { + "input_cost_per_token": 2e-06, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://www.together.ai/models/qwen3-coder-480b-a35b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-R1": { + "input_cost_per_token": 3e-06, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "max_output_tokens": 20480, + "max_tokens": 20480, + "mode": "chat", + "output_cost_per_token": 7e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-R1-0528-tput": { + "input_cost_per_token": 5.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.19e-06, + "source": "https://www.together.ai/models/deepseek-r1-0528-throughput", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-V3": { + "input_cost_per_token": 1.25e-06, + "litellm_provider": "together_ai", + "max_input_tokens": 65536, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/deepseek-ai/DeepSeek-V3.1": { + "input_cost_per_token": 6e-07, + "litellm_provider": "together_ai", + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.7e-06, + "source": "https://www.together.ai/models/deepseek-v3-1", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "max_input_tokens": 128000, + "max_output_tokens": 16384 + }, + "together_ai/meta-llama/Llama-3.2-3B-Instruct-Turbo": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo": { + "input_cost_per_token": 8.8e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free": { + "input_cost_per_token": 0, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { + "input_cost_per_token": 2.7e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8.5e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 5.9e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { + "input_cost_per_token": 3.5e-06, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 3.5e-06, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "input_cost_per_token": 8.8e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 8.8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 1.8e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/mistralai/Mistral-7B-Instruct-v0.1": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/mistralai/Mistral-Small-24B-Instruct-2501": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 6e-07, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2-Instruct": { + "input_cost_per_token": 1e-06, + "litellm_provider": "together_ai", + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://www.together.ai/models/kimi-k2-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/openai/gpt-oss-120b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://www.together.ai/models/gpt-oss-120b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/openai/gpt-oss-20b": { + "input_cost_per_token": 5e-08, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2e-07, + "source": "https://www.together.ai/models/gpt-oss-20b", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/togethercomputer/CodeLlama-34b-Instruct": { + "litellm_provider": "together_ai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/zai-org/GLM-4.5-Air-FP8": { + "input_cost_per_token": 2e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.1e-06, + "source": "https://www.together.ai/models/glm-4-5-air", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/zai-org/GLM-4.6": { + "input_cost_per_token": 6e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "source": "https://www.together.ai/models/glm-4-6", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "together_ai/zai-org/GLM-4.7": { + "input_cost_per_token": 4.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 2e-06, + "source": "https://www.together.ai/models/glm-4-7", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2.5": { + "input_cost_per_token": 5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.8e-06, + "source": "https://www.together.ai/models/kimi-k2-5", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_reasoning": true + }, + "together_ai/moonshotai/Kimi-K2-Instruct-0905": { + "input_cost_per_token": 1e-06, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://www.together.ai/models/kimi-k2-0905", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3.5-397B-A17B": { + "input_cost_per_token": 6e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3.6e-06, + "source": "https://www.together.ai/models/Qwen/Qwen3.5-397B-A17B", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "tts-1": { + "input_cost_per_character": 1.5e-05, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "tts-1-hd": { + "input_cost_per_character": 3e-05, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "aws_polly/standard": { + "input_cost_per_character": 4e-06, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "aws_polly/neural": { + "input_cost_per_character": 1.6e-05, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "aws_polly/long-form": { + "input_cost_per_character": 0.0001, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "aws_polly/generative": { + "input_cost_per_character": 3e-05, + "litellm_provider": "aws_polly", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ], + "source": "https://aws.amazon.com/polly/pricing/" + }, + "us.amazon.nova-lite-v1:0": { + "input_cost_per_token": 6e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 2.4e-07, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "us.amazon.nova-micro-v1:0": { + "input_cost_per_token": 3.5e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.4e-07, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "us.amazon.nova-premier-v1:0": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 1.25e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": false, + "supports_response_schema": true, + "supports_vision": true + }, + "us.amazon.nova-pro-v1:0": { + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 300000, + "max_output_tokens": 10000, + "max_tokens": 10000, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_vision": true + }, + "us.anthropic.claude-3-5-haiku-20241022-v1:0": { + "cache_creation_input_token_cost": 1e-06, + "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "us.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-06, + "cache_creation_input_token_cost_above_1hr": 2.2e-06, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-7-sonnet-20250219-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-3-haiku-20240307-v1:0": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 2.5e-08, + "cache_creation_input_token_cost": 3.125e-07 + }, + "us.anthropic.claude-3-opus-20240229-v1:0": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 1.875e-05 + }, + "us.anthropic.claude-3-sonnet-20240229-v1:0": { + "input_cost_per_token": 3e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 3.75e-06 + }, + "us.anthropic.claude-opus-4-1-20250805-v1:0": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_creation_input_token_cost_above_1hr": 6.6e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_creation_input_token_cost_above_1hr_above_200k_tokens": 1.32e-05, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "au.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.375e-06, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5.5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true + }, + "us.anthropic.claude-opus-4-20250514-v1:0": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.875e-06, + "cache_creation_input_token_cost_above_1hr": 1.1e-05, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 5.5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true + }, + "global.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_creation_input_token_cost_above_1hr": 1e-05, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true + }, + "eu.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_native_structured_output": true + }, + "us.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "us.deepseek.r1-v1:0": { + "input_cost_per_token": 1.35e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5.4e-06, + "supports_function_calling": false, + "supports_reasoning": true, + "supports_tool_choice": false + }, + "us.deepseek.v3.2": { + "input_cost_per_token": 6.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 1.85e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "eu.deepseek.v3.2": { + "input_cost_per_token": 7.4e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "mode": "chat", + "output_cost_per_token": 2.22e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "us.meta.llama3-1-405b-instruct-v1:0": { + "input_cost_per_token": 5.32e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.6e-05, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-70b-instruct-v1:0": { + "input_cost_per_token": 9.9e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 9.9e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-1-8b-instruct-v1:0": { + "input_cost_per_token": 2.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 2.2e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-11b-instruct-v1:0": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.5e-07, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "us.meta.llama3-2-1b-instruct-v1:0": { + "input_cost_per_token": 1e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-3b-instruct-v1:0": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama3-2-90b-instruct-v1:0": { + "input_cost_per_token": 2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_tool_choice": false, + "supports_vision": true + }, + "us.meta.llama3-3-70b-instruct-v1:0": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama4-maverick-17b-instruct-v1:0": { + "input_cost_per_token": 2.4e-07, + "input_cost_per_token_batches": 1.2e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 9.7e-07, + "output_cost_per_token_batches": 4.85e-07, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.meta.llama4-scout-17b-instruct-v1:0": { + "input_cost_per_token": 1.7e-07, + "input_cost_per_token_batches": 8.5e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6.6e-07, + "output_cost_per_token_batches": 3.3e-07, + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": false + }, + "us.mistral.pixtral-large-2502-v1:0": { + "input_cost_per_token": 2e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": false + }, + "v0/v0-1.0-md": { + "input_cost_per_token": 3e-06, + "litellm_provider": "v0", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "v0/v0-1.5-lg": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "v0", + "max_input_tokens": 512000, + "max_output_tokens": 512000, + "max_tokens": 512000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "v0/v0-1.5-md": { + "input_cost_per_token": 3e-06, + "litellm_provider": "v0", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/alibaba/qwen-3-14b": { + "input_cost_per_token": 8e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.4e-07 + }, + "vercel_ai_gateway/alibaba/qwen-3-235b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07 + }, + "vercel_ai_gateway/alibaba/qwen-3-30b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-07 + }, + "vercel_ai_gateway/alibaba/qwen-3-32b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 40960, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/alibaba/qwen3-coder": { + "input_cost_per_token": 4e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 262144, + "max_output_tokens": 66536, + "max_tokens": 66536, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/amazon/nova-lite": { + "input_cost_per_token": 6e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 300000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.4e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/amazon/nova-micro": { + "input_cost_per_token": 3.5e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.4e-07, + "supports_function_calling": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/amazon/nova-pro": { + "input_cost_per_token": 8e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 300000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/amazon/titan-embed-text-v2": { + "input_cost_per_token": 2e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/anthropic/claude-3-haiku": { + "cache_creation_input_token_cost": 3e-07, + "cache_read_input_token_cost": 3e-08, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/anthropic/claude-3-opus": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/anthropic/claude-3.5-haiku": { + "cache_creation_input_token_cost": 1e-06, + "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/anthropic/claude-3.5-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/anthropic/claude-3.7-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/anthropic/claude-4-opus": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/anthropic/claude-4-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-5-sonnet-20241022": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-3-7-sonnet": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-haiku-4.5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_minimal_reasoning_effort": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-opus-4.6": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_minimal_reasoning_effort": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/anthropic/claude-sonnet-4.5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vercel_ai_gateway/cohere/command-a": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/cohere/command-r": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/cohere/command-r-plus": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/cohere/embed-v4.0": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/deepseek/deepseek-r1": { + "input_cost_per_token": 5.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.19e-06, + "supports_tool_choice": true + }, + "vercel_ai_gateway/deepseek/deepseek-r1-distill-llama-70b": { + "input_cost_per_token": 7.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 9.9e-07, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/deepseek/deepseek-v3": { + "input_cost_per_token": 9e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 9e-07, + "supports_tool_choice": true + }, + "vercel_ai_gateway/google/gemini-2.0-flash": { + "deprecation_date": "2026-06-01", + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/google/gemini-2.0-flash-lite": { + "deprecation_date": "2026-06-01", + "input_cost_per_token": 7.5e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/google/gemini-2.5-flash": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1000000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/google/gemini-2.5-pro": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/google/gemini-embedding-001": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/google/gemma-2-9b": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/google/text-embedding-005": { + "input_cost_per_token": 2.5e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/google/text-multilingual-embedding-002": { + "input_cost_per_token": 2.5e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/inception/mercury-coder-small": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-06 + }, + "vercel_ai_gateway/meta/llama-3-70b": { + "input_cost_per_token": 5.9e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.9e-07, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-3-8b": { + "input_cost_per_token": 5e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 8e-08, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-3.1-70b": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-3.1-8b": { + "input_cost_per_token": 5e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131000, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 8e-08, + "supports_function_calling": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/meta/llama-3.2-11b": { + "input_cost_per_token": 1.6e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.6e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-3.2-1b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1e-07 + }, + "vercel_ai_gateway/meta/llama-3.2-3b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_function_calling": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/meta/llama-3.2-90b": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-3.3-70b": { + "input_cost_per_token": 7.2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 7.2e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-4-maverick": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_tool_choice": true + }, + "vercel_ai_gateway/meta/llama-4-scout": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/mistral/codestral": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 9e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/mistral/codestral-embed": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/mistral/devstral-small": { + "input_cost_per_token": 7e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.8e-07, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/mistral/magistral-medium": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/mistral/magistral-small": { + "input_cost_per_token": 5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true + }, + "vercel_ai_gateway/mistral/ministral-3b": { + "input_cost_per_token": 4e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 4e-08, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/mistral/ministral-8b": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/mistral/mistral-embed": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "chat", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/mistral/mistral-large": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/mistral/mistral-saba-24b": { + "input_cost_per_token": 7.9e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 7.9e-07 + }, + "vercel_ai_gateway/mistral/mistral-small": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 3e-07, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/mistral/mixtral-8x22b-instruct": { + "input_cost_per_token": 1.2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 65536, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "supports_function_calling": true + }, + "vercel_ai_gateway/mistral/pixtral-12b": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/mistral/pixtral-large": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/moonshotai/kimi-k2": { + "input_cost_per_token": 5.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/morph/morph-v3-fast": { + "input_cost_per_token": 8e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.2e-06 + }, + "vercel_ai_gateway/morph/morph-v3-large": { + "input_cost_per_token": 9e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1.9e-06 + }, + "vercel_ai_gateway/openai/gpt-3.5-turbo": { + "input_cost_per_token": 5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/openai/gpt-3.5-turbo-instruct": { + "input_cost_per_token": 1.5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 2e-06 + }, + "vercel_ai_gateway/openai/gpt-4-turbo": { + "input_cost_per_token": 1e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/openai/gpt-4.1": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/gpt-4.1-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.6e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/gpt-4.1-nano": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 1047576, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/gpt-4o": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 2.5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/gpt-4o-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/o1": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 6e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/o3": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/o3-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/o4-mini": { + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 1.1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "max_tokens": 100000, + "mode": "chat", + "output_cost_per_token": 4.4e-06, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_response_schema": true + }, + "vercel_ai_gateway/openai/text-embedding-3-large": { + "input_cost_per_token": 1.3e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/openai/text-embedding-3-small": { + "input_cost_per_token": 2e-08, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/openai/text-embedding-ada-002": { + "input_cost_per_token": 1e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 0, + "max_output_tokens": 0, + "max_tokens": 0, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "vercel_ai_gateway/perplexity/sonar": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 127000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1e-06 + }, + "vercel_ai_gateway/perplexity/sonar-pro": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 200000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 1.5e-05 + }, + "vercel_ai_gateway/perplexity/sonar-reasoning": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 127000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 5e-06 + }, + "vercel_ai_gateway/perplexity/sonar-reasoning-pro": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 127000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "mode": "chat", + "output_cost_per_token": 8e-06 + }, + "vercel_ai_gateway/vercel/v0-1.0-md": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/vercel/v0-1.5-md": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/xai/grok-2": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/xai/grok-2-vision": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_vision": true, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/xai/grok-3": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/xai/grok-3-fast": { + "input_cost_per_token": 5e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "supports_function_calling": true + }, + "vercel_ai_gateway/xai/grok-3-mini": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/xai/grok-3-mini-fast": { + "input_cost_per_token": 6e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/xai/grok-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/zai/glm-4.5": { + "input_cost_per_token": 6e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/zai/glm-4.5-air": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vercel_ai_gateway", + "max_input_tokens": 128000, + "max_output_tokens": 96000, + "max_tokens": 96000, + "mode": "chat", + "output_cost_per_token": 1.1e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vercel_ai_gateway/zai/glm-4.6": { + "litellm_provider": "vercel_ai_gateway", + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 4.5e-07, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.8e-06, + "source": "https://vercel.com/ai-gateway/models/glm-4.6", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/chirp": { + "input_cost_per_character": 3e-05, + "litellm_provider": "vertex_ai", + "mode": "audio_speech", + "source": "https://cloud.google.com/text-to-speech/pricing", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "vertex_ai/claude-3-5-haiku": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true + }, + "vertex_ai/claude-3-5-haiku@20241022": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true + }, + "vertex_ai/claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_vision": true + }, + "vertex_ai/claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_vision": true + }, + "vertex_ai/claude-3-5-sonnet": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-5-sonnet@20240620": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-7-sonnet@20250219": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "deprecation_date": "2026-05-11", + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-3-haiku": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-haiku@20240307": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.25e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-opus": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-opus@20240229": { + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-sonnet": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-3-sonnet@20240229": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-opus-4-1": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "input_cost_per_token_batches": 7.5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "output_cost_per_token_batches": 3.75e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4-1@20250805": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "input_cost_per_token_batches": 7.5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "output_cost_per_token_batches": 3.75e-05, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-opus-4-5": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-opus-4-5@20251101": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_minimal_reasoning_effort": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_native_streaming": true + }, + "vertex_ai/claude-opus-4-6": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "vertex_ai/claude-opus-4-6@default": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "vertex_ai/claude-opus-4-7": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "vertex_ai/claude-opus-4-7@default": { + "cache_creation_input_token_cost": 6.25e-06, + "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "tool_use_system_prompt_tokens": 346, + "supports_max_reasoning_effort": true, + "supports_minimal_reasoning_effort": true + }, + "vertex_ai/claude-sonnet-4-5": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "input_cost_per_token_batches": 1.5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_batches": 7.5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/claude-sonnet-4-6": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_minimal_reasoning_effort": true + }, + "vertex_ai/claude-sonnet-4-5@20250929": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "input_cost_per_token_batches": 1.5e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_batches": 7.5e-06, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_native_streaming": true + }, + "vertex_ai/claude-opus-4@20250514": { + "cache_creation_input_token_cost": 1.875e-05, + "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 1.5e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 7.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-sonnet-4": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/claude-sonnet-4@20250514": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, + "vertex_ai/mistralai/codestral-2@001": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2@001": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/codestral-2": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 9e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral-2501": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral@2405": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/codestral@latest": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 6e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/deepseek-ai/deepseek-v3.1-maas": { + "input_cost_per_token": 1.35e-06, + "litellm_provider": "vertex_ai-deepseek_models", + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 5.4e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_regions": [ + "us-central1" + ], + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/deepseek-ai/deepseek-v3.2-maas": { + "input_cost_per_token": 5.6e-07, + "input_cost_per_token_batches": 2.8e-07, + "litellm_provider": "vertex_ai-deepseek_models", + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1.68e-06, + "output_cost_per_token_batches": 8.4e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_regions": [ + "global" + ], + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/deepseek-ai/deepseek-r1-0528-maas": { + "input_cost_per_token": 1.35e-06, + "litellm_provider": "vertex_ai-deepseek_models", + "max_input_tokens": 65336, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_regions": [ + "us-central1" + ], + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_image_token": 3e-05, + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-generation#edit-an-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": false, + "tpm": 8000000 + }, + "vertex_ai/gemini-3-pro-image-preview": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_batches": 6e-06, + "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" + }, + "vertex_ai/gemini-3.1-flash-image-preview": { + "input_cost_per_image": 0.00056, + "input_cost_per_token": 5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.0672, + "output_cost_per_image_token": 6e-05, + "output_cost_per_token": 3e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models" + }, + "vertex_ai/gemini-3.1-flash-lite-preview": { + "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost_per_audio_token": 5e-08, + "input_cost_per_audio_token": 5e-07, + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 1.5e-06, + "output_cost_per_token": 1.5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "vertex_ai/deep-research-pro-preview-12-2025": { + "input_cost_per_image": 0.0011, + "input_cost_per_token": 2e-06, + "input_cost_per_token_batches": 1e-06, + "litellm_provider": "vertex_ai-language-models", + "max_input_tokens": 65536, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.134, + "output_cost_per_image_token": 0.00012, + "output_cost_per_token": 1.2e-05, + "output_cost_per_token_batches": 6e-06, + "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" + }, + "vertex_ai/imagegeneration@006": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-fast-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-generate-002": { + "deprecation_date": "2025-11-10", + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-3.0-capability-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-insert-objects" + }, + "vertex_ai/imagen-4.0-fast-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.02, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-4.0-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.04, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/imagen-4.0-ultra-generate-001": { + "litellm_provider": "vertex_ai-image-models", + "mode": "image_generation", + "output_cost_per_image": 0.06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" + }, + "vertex_ai/jamba-1.5": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-large": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-large@001": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 8e-06, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-mini": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "vertex_ai/jamba-1.5-mini@001": { + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai-ai21_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-3.1-405b-instruct-maas": { + "input_cost_per_token": 5e-06, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 1.6e-05, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-3.1-70b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-3.1-8b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "metadata": { + "notes": "VertexAI states that The Llama 3.1 API service for llama-3.1-70b-instruct-maas and llama-3.1-8b-instruct-maas are in public preview and at no cost." + }, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "max_tokens": 2048, + "metadata": { + "notes": "VertexAI states that The Llama 3.2 API service is at no cost during public preview, and will be priced as per dollar-per-1M-tokens at GA." + }, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas", + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/meta/llama-4-maverick-17b-128e-instruct-maas": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.15e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-4-maverick-17b-16e-instruct-maas": { + "input_cost_per_token": 3.5e-07, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.15e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 10000000, + "max_output_tokens": 10000000, + "max_tokens": 10000000, + "mode": "chat", + "output_cost_per_token": 7e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 10000000, + "max_output_tokens": 10000000, + "max_tokens": 10000000, + "mode": "chat", + "output_cost_per_token": 7e-07, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/meta/llama3-405b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_tool_choice": true + }, + "vertex_ai/meta/llama3-70b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_tool_choice": true + }, + "vertex_ai/meta/llama3-8b-instruct-maas": { + "input_cost_per_token": 0.0, + "litellm_provider": "vertex_ai-llama_models", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0.0, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_tool_choice": true + }, + "vertex_ai/minimaxai/minimax-m2-maas": { + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-minimax_models", + "max_input_tokens": 196608, + "max_output_tokens": 196608, + "max_tokens": 196608, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/moonshotai/kimi-k2-thinking-maas": { + "input_cost_per_token": 6e-07, + "litellm_provider": "vertex_ai-moonshot_models", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "vertex_ai/zai-org/glm-4.7-maas": { + "input_cost_per_token": 6e-07, + "litellm_provider": "vertex_ai-zai_models", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", + "supported_regions": [ + "global" + ], + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/zai-org/glm-5-maas": { + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-zai_models", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#glm-models", + "supported_regions": [ + "global" + ], + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3": { + "input_cost_per_token": 4e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-medium-3@001": { + "input_cost_per_token": 4e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/mistral-medium-3": { + "input_cost_per_token": 4e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistralai/mistral-medium-3@001": { + "input_cost_per_token": 4e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 2e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large-2411": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large@2407": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large@2411-001": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-large@latest": { + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 6e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-nemo@2407": { + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-nemo@latest": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.5e-07, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-small-2503": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "vertex_ai/mistral-small-2503@001": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-mistral_models", + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "max_tokens": 8191, + "mode": "chat", + "output_cost_per_token": 3e-06, + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/mistral-ocr-2505": { + "litellm_provider": "vertex_ai", + "mode": "ocr", + "ocr_cost_per_page": 0.0005, + "supported_endpoints": [ + "/v1/ocr" + ], + "source": "https://cloud.google.com/generative-ai-app-builder/pricing" + }, + "vertex_ai/deepseek-ai/deepseek-ocr-maas": { + "litellm_provider": "vertex_ai", + "mode": "ocr", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "ocr_cost_per_page": 0.0003, + "source": "https://cloud.google.com/vertex-ai/pricing", + "supported_regions": [ + "us-central1" + ] + }, + "vertex_ai/openai/gpt-oss-120b-maas": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai-openai_models", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 6e-07, + "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas", + "supports_reasoning": true + }, + "vertex_ai/openai/gpt-oss-20b-maas": { + "input_cost_per_token": 7.5e-08, + "litellm_provider": "vertex_ai-openai_models", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 3e-07, + "source": "https://console.cloud.google.com/vertex-ai/publishers/openai/model-garden/gpt-oss-120b-maas", + "supports_reasoning": true + }, + "vertex_ai/xai/grok-4.1-fast-non-reasoning": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://docs.x.ai/docs/models (Vertex AI Model Garden)", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/xai/grok-4.1-fast-reasoning": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "vertex_ai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://docs.x.ai/docs/models (Vertex AI Model Garden)", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/xai/grok-4.20-non-reasoning": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://docs.x.ai/docs/models (Vertex AI Model Garden)", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/xai/grok-4.20-reasoning": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "vertex_ai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://docs.x.ai/docs/models (Vertex AI Model Garden)", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas": { + "input_cost_per_token": 2.5e-07, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global", + "us-south1" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas": { + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/qwen/qwen3-next-80b-a3b-instruct-maas": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/qwen/qwen3-next-80b-a3b-thinking-maas": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "vertex_ai-qwen_models", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.2e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_regions": [ + "global" + ], + "supports_function_calling": true, + "supports_tool_choice": true + }, + "vertex_ai/veo-2.0-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.35, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.0-fast-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.0-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.1-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.1-fast-generate-preview": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.1-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "vertex_ai/veo-3.1-fast-generate-001": { + "litellm_provider": "vertex_ai-video-models", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.15, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "voyage/rerank-2": { + "input_cost_per_token": 5e-08, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_output_tokens": 16000, + "max_query_tokens": 16000, + "max_tokens": 16000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/rerank-2-lite": { + "input_cost_per_token": 2e-08, + "litellm_provider": "voyage", + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "max_query_tokens": 8000, + "max_tokens": 8000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/rerank-2.5": { + "input_cost_per_token": 5e-08, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/rerank-2.5-lite": { + "input_cost_per_token": 2e-08, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_query_tokens": 32000, + "max_tokens": 32000, + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-2": { + "input_cost_per_token": 1e-07, + "litellm_provider": "voyage", + "max_input_tokens": 4000, + "max_tokens": 4000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3": { + "input_cost_per_token": 6e-08, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3-large": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3-lite": { + "input_cost_per_token": 2e-08, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3.5": { + "input_cost_per_token": 6e-08, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-3.5-lite": { + "input_cost_per_token": 2e-08, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-code-2": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_tokens": 16000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-code-3": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-context-3": { + "input_cost_per_token": 1.8e-07, + "litellm_provider": "voyage", + "max_input_tokens": 120000, + "max_tokens": 120000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-finance-2": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-large-2": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_tokens": 16000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-law-2": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "voyage", + "max_input_tokens": 16000, + "max_tokens": 16000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-lite-01": { + "input_cost_per_token": 1e-07, + "litellm_provider": "voyage", + "max_input_tokens": 4096, + "max_tokens": 4096, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-lite-02-instruct": { + "input_cost_per_token": 1e-07, + "litellm_provider": "voyage", + "max_input_tokens": 4000, + "max_tokens": 4000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "voyage/voyage-multimodal-3": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "voyage", + "max_input_tokens": 32000, + "max_tokens": 32000, + "mode": "embedding", + "output_cost_per_token": 0.0 + }, + "wandb/openai/gpt-oss-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.015, + "output_cost_per_token": 0.06, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/openai/gpt-oss-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.005, + "output_cost_per_token": 0.02, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/zai-org/GLM-4.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.055, + "output_cost_per_token": 0.2, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/Qwen/Qwen3-235B-A22B-Instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 0.01, + "output_cost_per_token": 0.01, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 0.1, + "output_cost_per_token": 0.15, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/Qwen/Qwen3-235B-A22B-Thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 0.01, + "output_cost_per_token": 0.01, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/moonshotai/Kimi-K2-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.5e-06, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/moonshotai/Kimi-K2.5": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 3e-06, + "litellm_provider": "wandb", + "mode": "chat", + "source": "https://wandb.ai/inference/coreweave/cw_moonshotai_Kimi-K2.5", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true + }, + "wandb/MiniMaxAI/MiniMax-M2.5": { + "max_tokens": 197000, + "max_input_tokens": 197000, + "max_output_tokens": 197000, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "wandb", + "mode": "chat", + "source": "https://wandb.ai/inference/coreweave/cw_MiniMaxAI_MiniMax-M2.5", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true + }, + "wandb/meta-llama/Llama-3.1-8B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.022, + "output_cost_per_token": 0.022, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/deepseek-ai/DeepSeek-V3.1": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.055, + "output_cost_per_token": 0.165, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/deepseek-ai/DeepSeek-R1-0528": { + "max_tokens": 161000, + "max_input_tokens": 161000, + "max_output_tokens": 161000, + "input_cost_per_token": 0.135, + "output_cost_per_token": 0.54, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/deepseek-ai/DeepSeek-V3-0324": { + "max_tokens": 161000, + "max_input_tokens": 161000, + "max_output_tokens": 161000, + "input_cost_per_token": 0.114, + "output_cost_per_token": 0.275, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/meta-llama/Llama-3.3-70B-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.071, + "output_cost_per_token": 0.071, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "max_tokens": 64000, + "max_input_tokens": 64000, + "max_output_tokens": 64000, + "input_cost_per_token": 0.017, + "output_cost_per_token": 0.066, + "litellm_provider": "wandb", + "mode": "chat" + }, + "wandb/microsoft/Phi-4-mini-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.008, + "output_cost_per_token": 0.035, + "litellm_provider": "wandb", + "mode": "chat" + }, + "watsonx/ibm/granite-3-8b-instruct": { + "input_cost_per_token": 2e-07, + "litellm_provider": "watsonx", + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "max_tokens": 1024, + "mode": "chat", + "output_cost_per_token": 2e-07, + "supports_audio_input": false, + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-large": { + "input_cost_per_token": 3e-06, + "litellm_provider": "watsonx", + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_audio_input": false, + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": false + }, + "watsonx/bigscience/mt0-xxl-13b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/core42/jais-13b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/google/flan-t5-xl-3b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-chat-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-instruct-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-3-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-4-h-small": { + "max_tokens": 20480, + "max_input_tokens": 20480, + "max_output_tokens": 20480, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 2.5e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1024-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 3.8e-07, + "output_cost_per_token": 3.8e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1536-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 3.8e-07, + "output_cost_per_token": 3.8e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-512-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 3.8e-07, + "output_cost_per_token": 3.8e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-vision-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-11b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-07, + "output_cost_per_token": 3.5e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-1b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-90b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-06, + "output_cost_per_token": 2e-06, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 7.1e-07, + "output_cost_per_token": 7.1e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-4-maverick-17b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-07, + "output_cost_per_token": 1.4e-06, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-guard-3-11b-vision": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-07, + "output_cost_per_token": 3.5e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/mistralai/mistral-medium-2505": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3e-06, + "output_cost_per_token": 1e-05, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-3-1-24b-instruct-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 3.5e-07, + "output_cost_per_token": 3.5e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/openai/gpt-oss-120b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/sdaia/allam-1-13b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1.8e-06, + "output_cost_per_token": 1.8e-06, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/whisper-large-v3-turbo": { + "input_cost_per_second": 0.0001, + "output_cost_per_second": 0.0001, + "litellm_provider": "watsonx", + "mode": "audio_transcription", + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "whisper-1": { + "input_cost_per_second": 0.0001, + "litellm_provider": "openai", + "mode": "audio_transcription", + "output_cost_per_second": 0.0001, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "xai/grok-2": { + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-2-1212": { + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-2-latest": { + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-2-vision": { + "input_cost_per_image": 2e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-2-vision-1212": { + "deprecation_date": "2026-02-28", + "input_cost_per_image": 2e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-2-vision-latest": { + "input_cost_per_image": 2e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-3": { + "cache_read_input_token_cost": 7.5e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-beta": { + "cache_read_input_token_cost": 7.5e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-fast-beta": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-fast-latest": { + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.5e-05, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-latest": { + "cache_read_input_token_cost": 7.5e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini": { + "cache_read_input_token_cost": 7.5e-08, + "deprecation_date": "2026-02-28", + "input_cost_per_token": 3e-07, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-beta": { + "cache_read_input_token_cost": 7.5e-08, + "deprecation_date": "2026-02-28", + "input_cost_per_token": 3e-07, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-fast": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-fast-beta": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-fast-latest": { + "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 4e-06, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-3-mini-latest": { + "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 3e-07, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://x.ai/api#pricing", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4": { + "input_cost_per_token": 3e-06, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-fast-reasoning": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-fast-non-reasoning": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-0709": { + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_128k_tokens": 6e-06, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_128k_tokens": 3e-05, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-latest": { + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_128k_tokens": 6e-06, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "output_cost_per_token_above_128k_tokens": 3e-05, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-reasoning": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-reasoning-latest": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-non-reasoning": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4-1-fast-non-reasoning-latest": { + "cache_read_input_token_cost": 5e-08, + "input_cost_per_token": 2e-07, + "input_cost_per_token_above_128k_tokens": 4e-07, + "litellm_provider": "xai", + "max_input_tokens": 2000000.0, + "max_output_tokens": 2000000.0, + "max_tokens": 2000000.0, + "mode": "chat", + "output_cost_per_token": 5e-07, + "output_cost_per_token_above_128k_tokens": 1e-06, + "source": "https://docs.x.ai/docs/models/grok-4-1-fast-non-reasoning", + "supports_audio_input": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.20-multi-agent-beta-0309": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.20-beta-0309-reasoning": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.20-0309-reasoning": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.20-beta-0309-non-reasoning": { + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 2e-06, + "litellm_provider": "xai", + "max_input_tokens": 2000000, + "max_output_tokens": 2000000, + "max_tokens": 2000000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.3": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "xai", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "output_cost_per_token_above_200k_tokens": 5e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.3-latest": { + "cache_read_input_token_cost": 2e-07, + "cache_read_input_token_cost_above_200k_tokens": 4e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "xai", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "output_cost_per_token_above_200k_tokens": 5e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-beta": { + "input_cost_per_token": 5e-06, + "litellm_provider": "xai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-code-fast": { + "cache_read_input_token_cost": 2e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "xai/grok-code-fast-1": { + "cache_read_input_token_cost": 2e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "xai/grok-code-fast-1-0825": { + "cache_read_input_token_cost": 2e-08, + "input_cost_per_token": 2e-07, + "litellm_provider": "xai", + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "xai/grok-vision-beta": { + "input_cost_per_image": 5e-06, + "input_cost_per_token": 5e-06, + "litellm_provider": "xai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "zai.glm-4.7": { + "input_cost_per_token": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 2.2e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "zai.glm-5": { + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "zai.glm-4.7-flash": { + "input_cost_per_token": 7e-08, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-07, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "zai.glm-5": { + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 3.2e-06, + "source": "https://aws.amazon.com/bedrock/pricing/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "zai/glm-5": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 2e-07, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3.2e-06, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-5-code": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 5e-06, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.7": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.2e-06, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.6": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.2e-06, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5": { + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.2e-06, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5v": { + "input_cost_per_token": 6e-07, + "output_cost_per_token": 1.8e-06, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-x": { + "input_cost_per_token": 2.2e-06, + "output_cost_per_token": 8.9e-06, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-air": { + "input_cost_per_token": 2e-07, + "output_cost_per_token": 1.1e-06, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-airx": { + "input_cost_per_token": 1.1e-06, + "output_cost_per_token": 4.5e-06, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4-32b-0414-128k": { + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-4.5-flash": { + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "zai", + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "vertex_ai/search_api": { + "input_cost_per_query": 0.0015, + "litellm_provider": "vertex_ai", + "mode": "vector_store" + }, + "openai/container": { + "code_interpreter_cost_per_session": 0.03, + "litellm_provider": "openai", + "mode": "chat" + }, + "openai/sora-2": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] + }, + "openai/sora-2-pro": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] + }, + "openai/sora-2-pro-high-res": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1024x1792", + "1792x1024" + ] + }, + "azure/sora-2": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] + }, + "azure/sora-2-pro": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] + }, + "azure/sora-2-pro-high-res": { + "litellm_provider": "azure", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1024x1792", + "1792x1024" + ] + }, + "runwayml/gen4_turbo": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], + "metadata": { + "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" + } + }, + "runwayml/gen4_aleph": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.15, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], + "metadata": { + "comment": "15 credits per second @ $0.01 per credit = $0.15 per second" + } + }, + "runwayml/gen3a_turbo": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], + "metadata": { + "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" + } + }, + "runwayml/gen4_image": { + "litellm_provider": "runwayml", + "mode": "image_generation", + "input_cost_per_image": 0.05, + "output_cost_per_image": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "image" + ], + "supported_resolutions": [ + "1280x720", + "1920x1080" + ], + "metadata": { + "comment": "5 credits per 720p image or 8 credits per 1080p image @ $0.01 per credit. Using 5 credits ($0.05) as base cost" + } + }, + "runwayml/gen4_image_turbo": { + "litellm_provider": "runwayml", + "mode": "image_generation", + "input_cost_per_image": 0.02, + "output_cost_per_image": 0.02, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "image" + ], + "supported_resolutions": [ + "1280x720", + "1920x1080" + ], + "metadata": { + "comment": "2 credits per image (any resolution) @ $0.01 per credit = $0.02 per image" + } + }, + "runwayml/eleven_multilingual_v2": { + "litellm_provider": "runwayml", + "mode": "audio_speech", + "input_cost_per_character": 3e-07, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "metadata": { + "comment": "Estimated cost based on standard TTS pricing. RunwayML uses ElevenLabs models." + } + }, + "fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-a35b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 4.5e-07, + "output_cost_per_token": 1.8e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/flux-kontext-pro": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 4e-08, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/SSD-1B": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/chronos-hermes-13b-v2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-13b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-13b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-13b-python": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-34b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-34b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-34b-python": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-70b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-70b-python": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-7b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-7b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-llama-7b-python": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/code-qwen-1p5-7b": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/codegemma-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/codegemma-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-671b-v2-p1": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-llama-3b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-llama-70b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-llama-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-qwen-14b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/cogito-v1-preview-qwen-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-kontext-max": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 8e-08, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/dbrx-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-1b-base": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-33b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-7b-base": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-7b-base-v1p5": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-7b-instruct-v1p5": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-lite-base": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-coder-v2-lite-instruct": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-prover-v2": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-0528-distill-qwen3-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-llama-70b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-llama-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-14b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-1p5b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-7b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v2-lite-chat": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/deepseek-v2p5": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/devstral-small-2505": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dobby-mini-unhinged-plus-llama-3-1-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dobby-unhinged-llama-3-3-70b-new": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dolphin-2-9-2-qwen2-72b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/dolphin-2p6-mixtral-8x7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ernie-4p5-21b-a3b-pt": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ernie-4p5-300b-a47b-pt": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/fare-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/firefunction-v1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/firellava-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/firesearch-ocr-v6": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/fireworks-asr-large": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/fireworks-asr-v2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-dev": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-dev-controlnet-union": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-09, + "output_cost_per_token": 1e-09, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-dev-fp8": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 5e-10, + "output_cost_per_token": 5e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-schnell": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/flux-1-schnell-fp8": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 3.5e-10, + "output_cost_per_token": 3.5e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/gemma-2b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma-3-27b-it": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma-7b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gemma2-9b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/glm-4p5v": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-safeguard-120b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-safeguard-20b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/hermes-2-pro-mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/internvl3-38b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/internvl3-78b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/internvl3-8b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/japanese-stable-diffusion-xl": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/kat-coder": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/kat-dev-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/kat-dev-72b-exp": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-guard-2-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-guard-3-1b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-guard-3-8b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-13b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-70b-chat": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v2-7b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct-hf": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3-8b-instruct-hf": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct-long": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct-1b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p1-nemotron-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-1b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p2-3b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llamaguard-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/llava-yi-34b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/minimax-m1-80k": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/minimax-m2": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ministral-3-14b-instruct-2512": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ministral-3-3b-instruct-2512": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/ministral-3-8b-instruct-2512": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-instruct-4k": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-instruct-v0p2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-instruct-v3": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-7b-v0p2": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-large-3-fp8": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-nemo-base-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-nemo-instruct-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mistral-small-24b-instruct-2501": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 1.2e-06, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mixtral-8x7b-instruct-hf": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/mythomax-l2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nemotron-nano-v2-12b-vl": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-capybara-7b-v1p9": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-2-mixtral-8x7b-dpo": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-2-yi-34b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-llama2-13b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-llama2-70b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nous-hermes-llama2-7b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nvidia-nemotron-nano-12b-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/nvidia-nemotron-nano-9b-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openchat-3p5-0106-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openhermes-2-mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openhermes-2p5-mistral-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/openorca-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phi-2-3b": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phi-3-mini-128k-instruct": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phi-3-vision-128k-instruct": { + "max_tokens": 32064, + "max_input_tokens": 32064, + "max_output_tokens": 32064, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phind-code-llama-34b-python-v1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phind-code-llama-34b-v1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/phind-code-llama-34b-v2": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/playground-v2-1024px-aesthetic": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/playground-v2-5-1024px-aesthetic": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/pythia-12b": { + "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen-qwq-32b-preview": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen-v2p5-14b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen-v2p5-7b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen1p5-72b-chat": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-vl-2b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-vl-72b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2-vl-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-0p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-14b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-1p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-32b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-72b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-72b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-0p5b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-0p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-14b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-14b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-1p5b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-1p5b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-128k": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-32k-rope": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-64k": { + "max_tokens": 65536, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-3b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-3b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-coder-7b-instruct": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-math-72b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-32b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-72b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen2p5-vl-7b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-0p6b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-14b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b-fp8-draft": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b-fp8-draft-131072": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-1p7b-fp8-draft-40960": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 2.2e-07, + "output_cost_per_token": 8.8e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-07, + "output_cost_per_token": 8.8e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-07, + "output_cost_per_token": 8.8e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b-instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 5e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b-thinking-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/qwen3-4b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-4b-instruct-2507": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-8b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat", + "supports_reasoning": true + }, + "fireworks_ai/accounts/fireworks/models/qwen3-coder-30b-a3b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-instruct-bf16": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-embedding-0p6b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "embedding" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-embedding-4b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "embedding" + }, + "fireworks_ai/accounts/fireworks/models/": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "embedding" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-next-80b-a3b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-next-80b-a3b-thinking": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-reranker-0p6b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "rerank" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-reranker-4b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "rerank" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-reranker-8b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "max_output_tokens": 40960, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "rerank" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-235b-a22b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-07, + "output_cost_per_token": 8.8e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-235b-a22b-thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.2e-07, + "output_cost_per_token": 8.8e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-30b-a3b-instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-30b-a3b-thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-32b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwen3-vl-8b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/qwq-32b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/rolm-ocr": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/snorkel-mistral-7b-pairrm-dpo": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/stable-diffusion-xl-1024-v1-0": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1.3e-10, + "output_cost_per_token": 1.3e-10, + "litellm_provider": "fireworks_ai", + "mode": "image_generation" + }, + "fireworks_ai/accounts/fireworks/models/stablecode-3b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder-16b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder-7b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder2-15b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder2-3b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/starcoder2-7b": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/toppy-m-7b": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/whisper-v3": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/whisper-v3-turbo": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "fireworks_ai", + "mode": "audio_transcription" + }, + "fireworks_ai/accounts/fireworks/models/yi-34b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/yi-34b-200k-capybara": { + "max_tokens": 200000, + "max_input_tokens": 200000, + "max_output_tokens": 200000, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/yi-34b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 9e-07, + "output_cost_per_token": 9e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/yi-6b": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "fireworks_ai/accounts/fireworks/models/zephyr-7b-beta": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "fireworks_ai", + "mode": "chat" + }, + "novita/deepseek/deepseek-v3.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.69e-07, + "output_cost_per_token": 4e-07, + "max_input_tokens": 163840, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.345e-07, + "input_cost_per_token_cache_hit": 1.345e-07, + "supports_reasoning": true + }, + "novita/minimax/minimax-m2.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 3e-08, + "input_cost_per_token_cache_hit": 3e-08 + }, + "novita/zai-org/glm-4.7": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.2e-06, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token_cache_hit": 1.1e-07, + "supports_reasoning": true + }, + "novita/xiaomimimo/mimo-v2-flash": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3e-07, + "max_input_tokens": 262144, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 2e-08, + "input_cost_per_token_cache_hit": 2e-08, + "supports_reasoning": true + }, + "novita/zai-org/autoglm-phone-9b-multilingual": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.5e-08, + "output_cost_per_token": 1.38e-07, + "max_input_tokens": 65536, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/moonshotai/kimi-k2-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.5e-06, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/minimax/minimax-m2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "cache_read_input_token_cost": 3e-08, + "input_cost_per_token_cache_hit": 3e-08, + "supports_reasoning": true + }, + "novita/paddlepaddle/paddleocr-vl": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-08, + "output_cost_per_token": 2e-08, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-v3.2-exp": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 4.1e-07, + "max_input_tokens": 163840, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-235b-a22b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9.8e-07, + "output_cost_per_token": 3.95e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/zai-org/glm-4.6v": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 9e-07, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 5.5e-08, + "input_cost_per_token_cache_hit": 5.5e-08, + "supports_reasoning": true + }, + "novita/zai-org/glm-4.6": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.5e-07, + "output_cost_per_token": 2.2e-06, + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token_cache_hit": 1.1e-07, + "supports_reasoning": true + }, + "novita/kwaipilot/kat-coder-pro": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 6e-08, + "input_cost_per_token_cache_hit": 6e-08 + }, + "novita/qwen/qwen3-next-80b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 1.5e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-next-80b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 1.5e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-ocr": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-08, + "output_cost_per_token": 3e-08, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3.1-terminus": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-07, + "input_cost_per_token_cache_hit": 1.35e-07, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-235b-a22b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.5e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-max": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.11e-06, + "output_cost_per_token": 8.45e-06, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/skywork/r1v4-lite": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-07, + "output_cost_per_token": 6e-07, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-07, + "input_cost_per_token_cache_hit": 1.35e-07, + "supports_reasoning": true + }, + "novita/moonshotai/kimi-k2-0905": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.5e-06, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-coder-480b-a35b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.3e-06, + "max_input_tokens": 262144, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-coder-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 2.7e-07, + "max_input_tokens": 160000, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/openai/gpt-oss-120b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-08, + "output_cost_per_token": 2.5e-07, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/moonshotai/kimi-k2-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.7e-07, + "output_cost_per_token": 2.3e-06, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-v3-0324": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1.12e-06, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 163840, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.35e-07, + "input_cost_per_token_cache_hit": 1.35e-07 + }, + "novita/zai-org/glm-4.5": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-07, + "output_cost_per_token": 2.2e-06, + "max_input_tokens": 131072, + "max_output_tokens": 98304, + "max_tokens": 98304, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token_cache_hit": 1.1e-07, + "supports_reasoning": true + }, + "novita/qwen/qwen3-235b-a22b-thinking-2507": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 3e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3.1-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-08, + "output_cost_per_token": 5e-08, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_system_messages": true + }, + "novita/google/gemma-3-12b-it": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-08, + "output_cost_per_token": 1e-07, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/zai-org/glm-4.5v": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-07, + "output_cost_per_token": 1.8e-06, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 1.1e-07, + "input_cost_per_token_cache_hit": 1.1e-07, + "supports_reasoning": true + }, + "novita/openai/gpt-oss-20b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.5e-07, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-235b-a22b-instruct-2507": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-08, + "output_cost_per_token": 5.8e-07, + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-r1-distill-qwen-14b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 1.5e-07, + "max_input_tokens": 32768, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3.3-70b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.35e-07, + "output_cost_per_token": 4e-07, + "max_input_tokens": 131072, + "max_output_tokens": 120000, + "max_tokens": 120000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen-2.5-72b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.8e-07, + "output_cost_per_token": 4e-07, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/mistralai/mistral-nemo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.7e-07, + "max_input_tokens": 60288, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/minimaxai/minimax-m1-80k": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.5e-07, + "output_cost_per_token": 2.2e-06, + "max_input_tokens": 1000000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-0528": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-07, + "output_cost_per_token": 2.5e-06, + "max_input_tokens": 163840, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "cache_read_input_token_cost": 3.5e-07, + "input_cost_per_token_cache_hit": 3.5e-07, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-distill-qwen-32b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-07, + "output_cost_per_token": 3e-07, + "max_input_tokens": 64000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-08, + "output_cost_per_token": 4e-08, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_system_messages": true + }, + "novita/microsoft/wizardlm-2-8x22b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6.2e-07, + "output_cost_per_token": 6.2e-07, + "max_input_tokens": 65535, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-r1-0528-qwen3-8b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 6e-08, + "output_cost_per_token": 9e-08, + "max_input_tokens": 128000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/deepseek/deepseek-r1-distill-llama-70b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-07, + "output_cost_per_token": 8e-07, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-3-70b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5.1e-07, + "output_cost_per_token": 7.4e-07, + "max_input_tokens": 8192, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-235b-a22b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-07, + "output_cost_per_token": 8e-07, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/meta-llama/llama-4-maverick-17b-128e-instruct-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 8.5e-07, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/meta-llama/llama-4-scout-17b-16e-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.8e-07, + "output_cost_per_token": 5.9e-07, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/nousresearch/hermes-2-pro-llama-3-8b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-07, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen2.5-vl-72b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-07, + "output_cost_per_token": 8e-07, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/sao10k/l3-70b-euryale-v2.1": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.48e-06, + "output_cost_per_token": 1.48e-06, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-21B-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 2.8e-07, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/sao10k/l3-8b-lunaris": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-08, + "output_cost_per_token": 5e-08, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/baichuan/baichuan-m2-32b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 7e-08, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-424b-a47b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4.2e-07, + "output_cost_per_token": 1.25e-06, + "max_input_tokens": 123000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/baidu/ernie-4.5-300b-a47b-paddle": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.8e-07, + "output_cost_per_token": 1.1e-06, + "max_input_tokens": 123000, + "max_output_tokens": 12000, + "max_tokens": 12000, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/deepseek/deepseek-prover-v2-671b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-07, + "output_cost_per_token": 2.5e-06, + "max_input_tokens": 160000, + "max_output_tokens": 160000, + "max_tokens": 160000, + "supports_system_messages": true + }, + "novita/qwen/qwen3-32b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4.5e-07, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-30b-a3b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-08, + "output_cost_per_token": 4.5e-07, + "max_input_tokens": 40960, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/google/gemma-3-27b-it": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.19e-07, + "output_cost_per_token": 2e-07, + "max_input_tokens": 98304, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_vision": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-v3-turbo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 4e-07, + "output_cost_per_token": 1.3e-06, + "max_input_tokens": 64000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/deepseek/deepseek-r1-turbo": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-07, + "output_cost_per_token": 2.5e-06, + "max_input_tokens": 64000, + "max_output_tokens": 16000, + "max_tokens": 16000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/Sao10K/L3-8B-Stheno-v3.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 5e-08, + "output_cost_per_token": 5e-08, + "max_input_tokens": 8192, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/gryphe/mythomax-l2-13b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 9e-08, + "output_cost_per_token": 9e-08, + "max_input_tokens": 4096, + "max_output_tokens": 3200, + "max_tokens": 3200, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-28b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.9e-07, + "output_cost_per_token": 3.9e-07, + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-8b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 8e-08, + "output_cost_per_token": 5e-07, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/zai-org/glm-4.5-air": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 8.5e-07, + "max_input_tokens": 131072, + "max_output_tokens": 98304, + "max_tokens": 98304, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-vl-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-07, + "output_cost_per_token": 7e-07, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-vl-30b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2e-07, + "output_cost_per_token": 1e-06, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/qwen/qwen3-omni-30b-a3b-thinking": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 9.7e-07, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_audio_input": true + }, + "novita/qwen/qwen3-omni-30b-a3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 9.7e-07, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "max_tokens": 16384, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_response_schema": true, + "supports_audio_input": true, + "supports_audio_output": true + }, + "novita/qwen/qwen-mt-plus": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 2.5e-07, + "output_cost_per_token": 7.5e-07, + "max_input_tokens": 16384, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_system_messages": true + }, + "novita/baidu/ernie-4.5-vl-28b-a3b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 5.6e-07, + "max_input_tokens": 30000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/baidu/ernie-4.5-21B-a3b": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 2.8e-07, + "max_input_tokens": 120000, + "max_output_tokens": 8000, + "max_tokens": 8000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen3-8b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3.5e-08, + "output_cost_per_token": 1.38e-07, + "max_input_tokens": 128000, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen3-4b-fp8": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-08, + "output_cost_per_token": 3e-08, + "max_input_tokens": 128000, + "max_output_tokens": 20000, + "max_tokens": 20000, + "supports_system_messages": true, + "supports_reasoning": true + }, + "novita/qwen/qwen2.5-7b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 7e-08, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true, + "supports_response_schema": true + }, + "novita/meta-llama/llama-3.2-3b-instruct": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 3e-08, + "output_cost_per_token": 5e-08, + "max_input_tokens": 32768, + "max_output_tokens": 32000, + "max_tokens": 32000, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/sao10k/l31-70b-euryale-v2.2": { + "litellm_provider": "novita", + "mode": "chat", + "input_cost_per_token": 1.48e-06, + "output_cost_per_token": 1.48e-06, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true, + "supports_system_messages": true + }, + "novita/qwen/qwen3-embedding-0.6b": { + "litellm_provider": "novita", + "mode": "embedding", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 0, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768 + }, + "novita/qwen/qwen3-embedding-8b": { + "litellm_provider": "novita", + "mode": "embedding", + "input_cost_per_token": 7e-08, + "output_cost_per_token": 0, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "novita/baai/bge-m3": { + "litellm_provider": "novita", + "mode": "embedding", + "input_cost_per_token": 1e-08, + "output_cost_per_token": 1e-08, + "max_input_tokens": 8192, + "max_output_tokens": 96000, + "max_tokens": 96000 + }, + "novita/qwen/qwen3-reranker-8b": { + "litellm_provider": "novita", + "mode": "rerank", + "input_cost_per_token": 5e-08, + "output_cost_per_token": 5e-08, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096 + }, + "novita/baai/bge-reranker-v2-m3": { + "litellm_provider": "novita", + "mode": "rerank", + "input_cost_per_token": 1e-08, + "output_cost_per_token": 1e-08, + "max_input_tokens": 8000, + "max_output_tokens": 8000, + "max_tokens": 8000 + }, + "llamagate/llama-3.1-8b": { + "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-08, + "output_cost_per_token": 5e-08, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/llama-3.2-3b": { + "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 8e-08, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/mistral-7b-v0.3": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/qwen3-8b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.4e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/dolphin3-8b": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/deepseek-r1-8b": { + "max_tokens": 16384, + "max_input_tokens": 65536, + "max_output_tokens": 16384, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/deepseek-r1-7b-qwen": { + "max_tokens": 16384, + "max_input_tokens": 131072, + "max_output_tokens": 16384, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/openthinker-7b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true + }, + "llamagate/qwen2.5-coder-7b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 1.2e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/deepseek-coder-6.7b": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 1.2e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/codellama-7b": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 6e-08, + "output_cost_per_token": 1.2e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true + }, + "llamagate/qwen3-vl-8b": { + "max_tokens": 8192, + "max_input_tokens": 32768, + "max_output_tokens": 8192, + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 5.5e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/llava-7b": { + "max_tokens": 2048, + "max_input_tokens": 4096, + "max_output_tokens": 2048, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 2e-07, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/gemma3-4b": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 3e-08, + "output_cost_per_token": 8e-08, + "litellm_provider": "llamagate", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_vision": true + }, + "llamagate/nomic-embed-text": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 0, + "litellm_provider": "llamagate", + "mode": "embedding" + }, + "llamagate/qwen3-embedding-8b": { + "max_tokens": 40960, + "max_input_tokens": 40960, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 0, + "litellm_provider": "llamagate", + "mode": "embedding" + }, + "sarvam/sarvam-m": { + "cache_creation_input_token_cost": 0, + "cache_creation_input_token_cost_above_1hr": 0, + "cache_read_input_token_cost": 0, + "input_cost_per_token": 0, + "litellm_provider": "sarvam", + "max_input_tokens": 8192, + "max_output_tokens": 32000, + "max_tokens": 32000, + "mode": "chat", + "output_cost_per_token": 0, + "supports_reasoning": true + }, + "tts-1-1106": { + "input_cost_per_character": 1.5e-05, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "tts-1-hd-1106": { + "input_cost_per_character": 3e-05, + "litellm_provider": "openai", + "mode": "audio_speech", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "gpt-4o-mini-tts-2025-03-20": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "mode": "audio_speech", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] + }, + "gpt-4o-mini-tts-2025-12-15": { + "input_cost_per_token": 2.5e-06, + "litellm_provider": "openai", + "mode": "audio_speech", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_second": 0.00025, + "output_cost_per_token": 1e-05, + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] + }, + "gpt-4o-mini-transcribe-2025-03-20": { + "input_cost_per_audio_token": 3e-06, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 5e-06, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "gpt-4o-mini-transcribe-2025-12-15": { + "input_cost_per_audio_token": 3e-06, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 16000, + "max_output_tokens": 2000, + "mode": "audio_transcription", + "output_cost_per_token": 5e-06, + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] + }, + "gpt-5-search-api": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false, + "supports_minimal_reasoning_effort": true + }, + "gpt-5-search-api-2025-10-14": { + "cache_read_input_token_cost": 1.25e-07, + "input_cost_per_token": 1.25e-06, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1e-05, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": false, + "supports_xhigh_reasoning_effort": false + }, + "gpt-realtime-mini-2025-10-06": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_audio_token_cost": 3e-07, + "cache_read_input_token_cost": 6e-08, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_image": 8e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-mini-2025-12-15": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_audio_token_cost": 3e-07, + "cache_read_input_token_cost": 6e-08, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_image": 8e-07, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "sora-2": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.1, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] + }, + "sora-2-pro": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.3, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] + }, + "sora-2-pro-high-res": { + "litellm_provider": "openai", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.5, + "source": "https://platform.openai.com/docs/api-reference/videos", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1024x1792", + "1792x1024" + ] + }, + "chatgpt-image-latest": { + "cache_read_input_image_token_cost": 2.5e-06, + "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_image_token": 1e-05, + "input_cost_per_token": 5e-06, + "litellm_provider": "openai", + "mode": "image_generation", + "output_cost_per_image_token": 4e-05, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, + "gemini-2.0-flash-exp-image-generation": { + "input_cost_per_token": 0.0, + "litellm_provider": "gemini", + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_token": 0.0, + "source": "https://ai.google.dev/pricing", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_vision": true + }, + "gemini/gemini-2.0-flash-exp-image-generation": { + "input_cost_per_token": 0.0, + "litellm_provider": "gemini", + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_token": 0.0, + "source": "https://ai.google.dev/pricing", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_vision": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.0-flash-lite-001": { + "cache_read_input_token_cost": 1.875e-08, + "deprecation_date": "2026-06-01", + "input_cost_per_audio_token": 7.5e-08, + "input_cost_per_token": 7.5e-08, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 50, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 3e-07, + "rpm": 4000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 4000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-2.5-flash-native-audio-latest": { + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-2.5-flash-native-audio-preview-09-2025": { + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-2.5-flash-native-audio-preview-12-2025": { + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true + }, + "gemini-3.1-flash-live-preview": { + "input_cost_per_audio_token": 3e-06, + "input_cost_per_image_token": 1e-06, + "input_cost_per_token": 7.5e-07, + "input_cost_per_video_per_second": 3.3333333333333335e-05, + "litellm_provider": "gemini", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_token": 4.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_web_search": true + }, + "gemini/gemini-2.5-flash-native-audio-latest": { + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.5-flash-native-audio-preview-09-2025": { + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-2.5-flash-native-audio-preview-12-2025": { + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini/gemini-3.1-flash-live-preview": { + "input_cost_per_audio_token": 3e-06, + "input_cost_per_image_token": 1e-06, + "input_cost_per_token": 7.5e-07, + "input_cost_per_video_per_second": 3.3333333333333335e-05, + "litellm_provider": "gemini", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_audio_token": 1.2e-05, + "output_cost_per_token": 4.5e-06, + "source": "https://ai.google.dev/gemini-api/docs/pricing", + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "rpm": 10 + }, + "gemini-2.5-flash-preview-tts": { + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "mode": "audio_speech", + "output_cost_per_token": 2.5e-06, + "source": "https://ai.google.dev/pricing", + "supported_endpoints": [ + "/v1/audio/speech" + ] + }, + "gemini-flash-latest": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-flash-lite-latest": { + "cache_read_input_token_cost": 1e-08, + "input_cost_per_audio_token": 3e-07, + "input_cost_per_token": 1e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 4e-07, + "output_cost_per_token": 4e-07, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-pro-latest": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini/gemini-pro-latest": { + "cache_read_input_token_cost": 1.25e-07, + "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, + "input_cost_per_token": 1.25e-06, + "input_cost_per_token_above_200k_tokens": 2.5e-06, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_token": 1e-05, + "output_cost_per_token_above_200k_tokens": 1.5e-05, + "rpm": 2000, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "gemini-exp-1206": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000, + "search_context_cost_per_query": { + "search_context_size_low": 0.035, + "search_context_size_medium": 0.035, + "search_context_size_high": 0.035 + } + }, + "vertex_ai/claude-sonnet-4-6@default": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_minimal_reasoning_effort": true + }, + "duckduckgo/search": { + "litellm_provider": "duckduckgo", + "mode": "search", + "input_cost_per_query": 0.0, + "metadata": { + "notes": "DuckDuckGo Instant Answer API is free and does not require an API key." + } + }, + "bedrock_mantle/openai.gpt-oss-120b": { + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "bedrock_mantle", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "bedrock_mantle/openai.gpt-oss-20b": { + "input_cost_per_token": 7.5e-08, + "output_cost_per_token": 3e-07, + "litellm_provider": "bedrock_mantle", + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "max_tokens": 32768, + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "bedrock_mantle/openai.gpt-oss-safeguard-120b": { + "input_cost_per_token": 1.5e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "bedrock_mantle", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "bedrock_mantle/openai.gpt-oss-safeguard-20b": { + "input_cost_per_token": 7.5e-08, + "output_cost_per_token": 3e-07, + "litellm_provider": "bedrock_mantle", + "max_input_tokens": 131072, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "volcengine/doubao-seed-2-0-pro-260215": { + "litellm_provider": "volcengine", + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "source": "https://www.volcengine.com/docs/82379/1330310", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": false, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4.6e-07, + "output_cost_per_token": 2.3e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 7e-07, + "output_cost_per_token": 3.5e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 1.4e-06, + "output_cost_per_token": 7e-06, + "range": [ + 128000.0, + 256000.0 + ] + } + ] + }, + "volcengine/doubao-seed-2-0-lite-260215": { + "litellm_provider": "volcengine", + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "source": "https://www.volcengine.com/docs/82379/1330310", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": false, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 8.7e-08, + "output_cost_per_token": 5.2e-07, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 7.8e-07, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 2.6e-07, + "output_cost_per_token": 1.6e-06, + "range": [ + 128000.0, + 256000.0 + ] + } + ] + }, + "volcengine/doubao-seed-2-0-mini-260215": { + "litellm_provider": "volcengine", + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "source": "https://www.volcengine.com/docs/82379/1330310", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": false, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 2.9e-08, + "output_cost_per_token": 2.9e-07, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 5.8e-08, + "output_cost_per_token": 5.8e-07, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 1.2e-07, + "output_cost_per_token": 1.2e-06, + "range": [ + 128000.0, + 256000.0 + ] + } + ] + }, + "volcengine/doubao-seed-2-0-code-preview-260215": { + "litellm_provider": "volcengine", + "max_input_tokens": 256000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "source": "https://www.volcengine.com/docs/82379/1330310", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": false, + "supports_vision": true, + "tiered_pricing": [ + { + "input_cost_per_token": 4.6e-07, + "output_cost_per_token": 2.3e-06, + "range": [ + 0, + 32000.0 + ] + }, + { + "input_cost_per_token": 7e-07, + "output_cost_per_token": 3.5e-06, + "range": [ + 32000.0, + 128000.0 + ] + }, + { + "input_cost_per_token": 1.4e-06, + "output_cost_per_token": 7e-06, + "range": [ + 128000.0, + 256000.0 + ] + } + ] + }, + "zai.glm-5": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3.2e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/zai.glm-5": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3.2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/zai.glm-5": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3.2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-east-1/minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-west-2/minimax.minimax-m2.5": { + "input_cost_per_token": 3e-07, + "output_cost_per_token": 1.2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "supports_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "source": "https://aws.amazon.com/bedrock/pricing/" + }, + "bedrock/us-gov-east-1/anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.5e-06, + "cache_read_input_token_cost": 1.2e-07, + "input_cost_per_token": 1.2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_pdf_input": true + }, + "bedrock/us-gov-west-1/anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.5e-06, + "cache_read_input_token_cost": 1.2e-07, + "input_cost_per_token": 1.2e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 6e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346, + "supports_native_structured_output": true, + "supports_pdf_input": true + } +} From 25e1ff585afe19566605517ce75bbba359d004ef Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Thu, 21 May 2026 23:09:11 +0900 Subject: [PATCH 06/17] =?UTF-8?q?feat:=20ChatClient=20=EC=9D=98=20?= =?UTF-8?q?=EC=82=AC=EC=A0=84=20=ED=94=84=EB=A1=AC=ED=94=84=ED=8A=B8=20?= =?UTF-8?q?=EC=84=B8=ED=8C=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 시스템 프롬프트 + layer1, layer2 데이터 주입 - 진행 결과 progress md 로 정리 --- .gitignore | 6 +- progress.md | 49 ++++++++++++ .../bootcamp/config/ChatClientConfig.java | 75 ++++++++++++++++++- src/main/resources/prompts/faq-system.st | 21 ++++++ 4 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 progress.md create mode 100644 src/main/resources/prompts/faq-system.st diff --git a/.gitignore b/.gitignore index 1c4fe0c..aca6b8d 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,8 @@ __pycache__/ # Matrix metadata **/.omc/ -src/main/resources/application-*.yml \ No newline at end of file +src/main/resources/application-*.yml + +src/main/resources/layer1_faq/ +src/main/resources/layer2_policies/ +src/main/resources/layer3_examples/ \ No newline at end of file diff --git a/progress.md b/progress.md new file mode 100644 index 0000000..a306817 --- /dev/null +++ b/progress.md @@ -0,0 +1,49 @@ +## 진행도 + +### 단순 사용자 프롬프트만 주입 + +``` +=== 평가 결과 === +전체: 14/150 (9.3%) + +난이도별: + easy : 2/30 (7%) + hard : 2/26 (8%) + medium : 10/94 (11%) +``` + +### 시스템 프롬프트 주입 + +``` +=== 평가 결과 === +전체: 6/150 (4.0%) + +난이도별: + easy : 0/30 (0%) + hard : 0/26 (0%) + medium : 6/94 (6%) +``` + +### 시스템 프롬프트 + 파라미터 주입 + +``` +=== 평가 결과 === +전체: 39/150 (26.0%) + +난이도별: + easy : 12/30 (40%) + hard : 2/26 (8%) + medium : 25/94 (27%) +``` + +``` +=== 평가 결과 === +전체: 40/150 (26.7%) + +난이도별: + easy : 11/30 (37%) + hard : 2/26 (8%) + medium : 27/94 (29%) +``` + +적당히 한계가 존재한다.. diff --git a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java index b3a4ce2..9bbc540 100644 --- a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java +++ b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java @@ -1,16 +1,87 @@ package com.cholog.bootcamp.config; import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.Resource; +import java.nio.charset.StandardCharsets; +import java.util.Map; + +/** + * FAQ 챗봇용 {@link ChatClient} 구성. + * + *

3계층 지식 베이스를 system prompt에 임베드한다: + *

+ * + *

의도적으로 제외된 자료: + *

+ */ +@Slf4j @Configuration @NoArgsConstructor public class ChatClientConfig { @Bean - public ChatClient chatClient(ChatClient.Builder builder) { - return builder.build(); + public ChatClient chatClient(ChatClient.Builder builder, + @Value("classpath:prompts/faq-system.st") Resource systemTemplateResource, + @Value("classpath:layer1_faq/*.md") Resource[] faqResources, + @Value("classpath:layer2_policies/current/*.md") Resource[] policyResources, + @Value("classpath:layer3_examples/*.md") Resource[] exampleResources) { + + var systemTemplate = loadSystemTemplate(systemTemplateResource); + var faq = concatResources(faqResources, "faq"); + var policies = concatResources(policyResources, "policies"); + var examples = concatResources(exampleResources, "examples"); + + return builder + .defaultSystem(spec -> spec + .text(systemTemplate) + .params(Map.of( + "faq", faq, + "policies", policies, + "examples", examples))) + .build(); + } + + private String loadSystemTemplate(Resource systemTemplate) { + try { + String content = systemTemplate.getContentAsString(StandardCharsets.UTF_8); + log.info("faq 시스템 프롬프트를 로드합니다. 길이: {}", content.length()); + return content; + } catch (Exception e) { + log.warn("faq 시스템 프롬프트 로드중 문제가 발생했습니다.", e); + return ""; + } + } + + private String concatResources(Resource[] resources, String label) { + try { + log.info("{} layer 파일을 로드합니다. 개수: {}", label, resources.length); + + var sb = new StringBuilder(); + for (Resource r : resources) { + sb.append("## ").append(r.getFilename()).append(System.lineSeparator()); + sb.append(r.getContentAsString(StandardCharsets.UTF_8)); + sb.append(System.lineSeparator()).append(System.lineSeparator()); + sb.append("---"); + sb.append(System.lineSeparator()).append(System.lineSeparator()); + } + return sb.toString(); + } catch (Exception e) { + log.warn("{} layer 로드중 문제가 발생했습니다.", label, e); + return ""; + } } } diff --git a/src/main/resources/prompts/faq-system.st b/src/main/resources/prompts/faq-system.st new file mode 100644 index 0000000..4a6ebc7 --- /dev/null +++ b/src/main/resources/prompts/faq-system.st @@ -0,0 +1,21 @@ +You are a customer support assistant for an e-commerce platform. +You answer questions based on the knowledge base provided below. + +답변 규칙: +1. 사용자가 한국어로 질문하면 한국어로, 영어로 질문하면 영어로 답하세요. +2. 자료의 우선순위: **Policies > FAQ > Examples**. + - Policies(현행 정책)와 FAQ가 충돌하면 Policies를 따르세요. + - Examples(과거 상담 예시)는 답변 톤/구조 참고용일 뿐, 정책의 근거로 삼지 마세요. +3. 자료에 명시적으로 있는 정보만 사용하세요. 추측하거나 일반 상식으로 보충하지 마세요. +4. 자료에 없는 질문이면 "해당 정보는 자료에서 확인되지 않습니다. 고객지원으로 문의해주세요."라고만 답하세요. +5. 답변은 핵심만 2-4문장 이내로 간결하게. 불필요한 인사말, 사과, 부연 설명 금지. +6. 정책/숫자/기간 같은 구체값은 원문 그대로 인용하세요. 임의로 바꾸지 마세요. + +# 1. Public FAQ +{faq} + +# 2. Current Policies (authoritative) +{policies} + +# 3. Past Conversation Examples (style reference only) +{examples} From 9b236914cea105b297feab3d503b84dceed78599 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Wed, 13 May 2026 21:05:33 +0900 Subject: [PATCH 07/17] =?UTF-8?q?feat:=20=ED=94=84=EB=A1=AC=ED=94=84?= =?UTF-8?q?=ED=8A=B8=EB=82=B4=20=ED=9A=8C=ED=94=BC=20=EA=B7=9C=EC=B9=99=20?= =?UTF-8?q?=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 정확도 다소 상승(Easy 는 일정치 도달) --- progress.md | 32 ++++++++++++++++++++++++ src/main/resources/prompts/faq-system.st | 9 +++++-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/progress.md b/progress.md index a306817..145e581 100644 --- a/progress.md +++ b/progress.md @@ -47,3 +47,35 @@ ``` 적당히 한계가 존재한다.. + +--- + +### 프롬프트 튜닝 + +``` +=== 평가 결과 === +전체: 69/150 (46.0%) + +난이도별: + easy : 13/30 (43%) + hard : 10/26 (38%) + medium : 46/94 (49%) +``` + +``` +=== 평가 결과 === +전체: 69/150 (46.0%) + +난이도별: + easy : 16/30 (53%) + hard : 11/26 (42%) + medium : 42/94 (45%) +``` + +회피 규칙 제거 +(정보가 있으면, 단순 라우팅이 아니라 정보 응답하게 명시) + +Easy 는 한계 도달 + +--- + diff --git a/src/main/resources/prompts/faq-system.st b/src/main/resources/prompts/faq-system.st index 4a6ebc7..9936619 100644 --- a/src/main/resources/prompts/faq-system.st +++ b/src/main/resources/prompts/faq-system.st @@ -7,8 +7,13 @@ You answer questions based on the knowledge base provided below. - Policies(현행 정책)와 FAQ가 충돌하면 Policies를 따르세요. - Examples(과거 상담 예시)는 답변 톤/구조 참고용일 뿐, 정책의 근거로 삼지 마세요. 3. 자료에 명시적으로 있는 정보만 사용하세요. 추측하거나 일반 상식으로 보충하지 마세요. -4. 자료에 없는 질문이면 "해당 정보는 자료에서 확인되지 않습니다. 고객지원으로 문의해주세요."라고만 답하세요. -5. 답변은 핵심만 2-4문장 이내로 간결하게. 불필요한 인사말, 사과, 부연 설명 금지. +4. 자료에 명확한 답이 있으면 **자신감 있게 답하세요**. + 회피 응답("고객지원으로 문의해주세요")은 자료의 FAQ·Policies 모든 섹션을 검토한 뒤에도 관련 정보를 전혀 찾을 수 없을 때만 사용하세요. + 비슷하거나 부분적인 정보가 있으면 "정확한 X는 자료에 없지만, 관련하여 Y는 다음과 같습니다"처럼 부분 정보라도 제공하세요. +5. 답변은 자료에 있는 **모든 관련 조건과 예외를 포함**하여 작성하세요. + - 등급별(standard/Plus/VIP)/케이스별/예외 조건이 있으면 빠짐없이 명시 + - 정책에 명시된 추가 조건(예: 미개봉, 시한, 제외 지역, 영업일 기준)을 함께 답변 + - 길이는 정확성을 위해 필요한 만큼. 불필요한 인사말/사과만 금지. 6. 정책/숫자/기간 같은 구체값은 원문 그대로 인용하세요. 임의로 바꾸지 마세요. # 1. Public FAQ From 9573335fbabcc624144fa220f06384ca04990969 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Sun, 17 May 2026 15:39:36 +0900 Subject: [PATCH 08/17] =?UTF-8?q?feat:=20RAG=20=EC=9C=84=ED=95=9C=20Vector?= =?UTF-8?q?=20Store=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SimpleVectorStore 통한 로컬 세팅 --- build.gradle | 1 + docs/RAG.MD | 25 ++++++++ .../bootcamp/config/VectorStoreConfig.java | 59 +++++++++++++++++++ .../bootcamp/controller/VectorController.java | 28 +++++++++ .../bootcamp/dto/VectorResponseDto.java | 20 +++++++ .../bootcamp/service/VectorService.java | 23 ++++++++ 6 files changed, 156 insertions(+) create mode 100644 docs/RAG.MD create mode 100644 src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java create mode 100644 src/main/java/com/cholog/bootcamp/controller/VectorController.java create mode 100644 src/main/java/com/cholog/bootcamp/dto/VectorResponseDto.java create mode 100644 src/main/java/com/cholog/bootcamp/service/VectorService.java diff --git a/build.gradle b/build.gradle index 71883d3..8a405d3 100644 --- a/build.gradle +++ b/build.gradle @@ -26,6 +26,7 @@ dependencyManagement { dependencies { implementation 'org.springframework.boot:spring-boot-starter-web' implementation 'org.springframework.ai:spring-ai-starter-model-openai' + implementation 'org.springframework.ai:spring-ai-rag' compileOnly 'org.projectlombok:lombok' testImplementation 'org.springframework.boot:spring-boot-starter-test' diff --git a/docs/RAG.MD b/docs/RAG.MD new file mode 100644 index 0000000..6bdff52 --- /dev/null +++ b/docs/RAG.MD @@ -0,0 +1,25 @@ +## RAG + +Retrieval-Augmented Generation + +질문과 관련된 문서 조각만 검색을 해, LLM 컨텍스트에 주입하는 패턴 + +### 전체 문서 전달 + +문서를 전체 LIST 로 전달하면, 컨텍스트가 폭발하게 된다. +노이즈가 많기 때문에 방향을 잃게 된다. - lost in the middle + +> lost in the middle +> 컨텍스트는 앞/뒤는 잘 기억하지만, 중간은 흐릿하게 기억한다. + +일정 수치를 넘어가면, 한계에 도달한다.(토큰 비용 역시 증가) + +이러한 문제를 해결하기 위해 RAG 를 사용한다. + +- 검색 단계에서 무관한 문서 제거, 컨텍스트에 관련성 높은 요소들만 전달 + (LLM 도 관련 요소만 받기에, 정확도 상승) + +- 입력 토큰이 일정하다 + +질문당, top-n 개의 요소만 선택해서 주입한다. +문서가 늘어나더라도 검색 후에는 동일하다. \ No newline at end of file diff --git a/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java b/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java new file mode 100644 index 0000000..8c397af --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java @@ -0,0 +1,59 @@ +package com.cholog.bootcamp.config; + +import jakarta.annotation.PostConstruct; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.ai.vectorstore.SimpleVectorStore; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.Resource; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +@Slf4j +@Configuration +public class VectorStoreConfig { + + @Bean + public VectorStore vectorStore(EmbeddingModel model, + @Value("classpath:prompts/faq-system.st") Resource systemTemplateResource, + @Value("classpath:layer1_faq/*.md") Resource[] faqResources, + @Value("classpath:layer2_policies/current/*.md") Resource[] policyResources, + @Value("classpath:layer3_examples/*.md") Resource[] exampleResources) { + var store = SimpleVectorStore.builder(model).build(); + + var documents = new ArrayList(); + documents.addAll(toDocuments(faqResources, "faq")); + documents.addAll(toDocuments(policyResources, "policy")); + documents.addAll(toDocuments(exampleResources, "example")); + + var chunks = new TokenTextSplitter().apply(documents); + + store.add(chunks); + log.info("vector store 적재 완료. 원본 {} -> 청크 {}", documents.size(), chunks.size()); + return store; + } + + private List toDocuments(Resource[] resources, String layer) { + return Arrays.stream(resources) + .map(r -> { + try { + return new Document( + r.getContentAsString(StandardCharsets.UTF_8), + Map.of("source", r.getFilename(), "layer", layer)); + } catch (IOException e) { + throw new IllegalStateException("리소스 로드 실패: " + r.getFilename(), e); + } + }) + .toList(); + } +} diff --git a/src/main/java/com/cholog/bootcamp/controller/VectorController.java b/src/main/java/com/cholog/bootcamp/controller/VectorController.java new file mode 100644 index 0000000..a356785 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/controller/VectorController.java @@ -0,0 +1,28 @@ +package com.cholog.bootcamp.controller; + +import com.cholog.bootcamp.dto.VectorResponseDto; +import com.cholog.bootcamp.service.VectorService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import java.util.List; + +@RestController +@RequiredArgsConstructor +@RequestMapping("/api/vector") +@Slf4j +public class VectorController { + + private final VectorService vectorService; + + @GetMapping + public ResponseEntity> question( + @RequestParam String keyword + ) { + log.info("Vector 요청이 들어왔습니다. {}", keyword); + var response = vectorService.request(keyword); + return ResponseEntity.ok(response); + } +} diff --git a/src/main/java/com/cholog/bootcamp/dto/VectorResponseDto.java b/src/main/java/com/cholog/bootcamp/dto/VectorResponseDto.java new file mode 100644 index 0000000..58f0944 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/dto/VectorResponseDto.java @@ -0,0 +1,20 @@ +package com.cholog.bootcamp.dto; + +import org.springframework.ai.document.Document; + +import java.util.Map; + +public record VectorResponseDto( + Double score, + String text, + Map metadata +) { + + public static VectorResponseDto from(Document document) { + return new VectorResponseDto( + document.getScore(), + document.getText(), + document.getMetadata() + ); + } +} diff --git a/src/main/java/com/cholog/bootcamp/service/VectorService.java b/src/main/java/com/cholog/bootcamp/service/VectorService.java new file mode 100644 index 0000000..718f226 --- /dev/null +++ b/src/main/java/com/cholog/bootcamp/service/VectorService.java @@ -0,0 +1,23 @@ +package com.cholog.bootcamp.service; + +import com.cholog.bootcamp.dto.VectorResponseDto; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.stereotype.Service; + +import java.util.List; + +@Service +@RequiredArgsConstructor +@Slf4j +public class VectorService { + + private final VectorStore vectorStore; + + public List request(String keyword) { + var hits = vectorStore.similaritySearch(keyword); + log.info("{} 에 대한 결과: {}개", keyword, hits.size()); + return hits.stream().map(VectorResponseDto::from).toList(); + } +} From eddc0dc230acb58589bd6a337b02b14aae4269a9 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 19 May 2026 22:16:37 +0900 Subject: [PATCH 09/17] =?UTF-8?q?feat:=20=EB=AC=B8=EC=84=9C=20RAG=20?= =?UTF-8?q?=EB=A1=9C=20=EB=B6=84=EC=84=9D=20=EB=B0=8F=20RAG=20=EB=A5=BC=20?= =?UTF-8?q?=ED=86=B5=ED=95=9C=20=EA=B2=80=EC=83=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - chunk 400 + temperature 0.2 로 설정 --- progress.md | 24 ++++++++++ .../bootcamp/config/ChatClientConfig.java | 6 +-- .../bootcamp/config/VectorStoreConfig.java | 17 +++++-- .../FrequentlyQuestionChatApiController.java | 8 ++-- .../FrequentlyQuestionChatApiService.java | 47 +++++++++++++++++++ src/main/resources/application.yml | 2 +- src/main/resources/prompts/faq-system.st | 42 ++++++++--------- 7 files changed, 110 insertions(+), 36 deletions(-) diff --git a/progress.md b/progress.md index 145e581..178f426 100644 --- a/progress.md +++ b/progress.md @@ -79,3 +79,27 @@ Easy 는 한계 도달 --- +### RAG 도입 + +``` +=== 평가 결과 === +전체: 82/150 (54.7%) + +난이도별: + easy : 16/30 (53%) + hard : 15/26 (58%) + medium : 51/94 (54%) +``` + +``` +=== 평가 결과 === +전체: 78/150 (52.0%) + +난이도별: + easy : 17/30 (57%) + hard : 10/26 (38%) + medium : 51/94 (54%) +``` + +chunk size 400 + temperature 0.2 로 설정 +아직 RAG 의 도입의 효과를 잘못 느끼겠다.. diff --git a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java index 9bbc540..3a65af5 100644 --- a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java +++ b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java @@ -47,11 +47,7 @@ public ChatClient chatClient(ChatClient.Builder builder, return builder .defaultSystem(spec -> spec - .text(systemTemplate) - .params(Map.of( - "faq", faq, - "policies", policies, - "examples", examples))) + .text(systemTemplate)) .build(); } diff --git a/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java b/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java index 8c397af..bc74a99 100644 --- a/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java +++ b/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java @@ -1,6 +1,5 @@ package com.cholog.bootcamp.config; -import jakarta.annotation.PostConstruct; import lombok.extern.slf4j.Slf4j; import org.springframework.ai.document.Document; import org.springframework.ai.embedding.EmbeddingModel; @@ -25,7 +24,6 @@ public class VectorStoreConfig { @Bean public VectorStore vectorStore(EmbeddingModel model, - @Value("classpath:prompts/faq-system.st") Resource systemTemplateResource, @Value("classpath:layer1_faq/*.md") Resource[] faqResources, @Value("classpath:layer2_policies/current/*.md") Resource[] policyResources, @Value("classpath:layer3_examples/*.md") Resource[] exampleResources) { @@ -36,7 +34,20 @@ public VectorStore vectorStore(EmbeddingModel model, documents.addAll(toDocuments(policyResources, "policy")); documents.addAll(toDocuments(exampleResources, "example")); - var chunks = new TokenTextSplitter().apply(documents); + + TokenTextSplitter splitter = TokenTextSplitter.builder() + .withChunkSize(400) + .withMinChunkSizeChars(200) + .withMaxNumChunks(10000) + .withKeepSeparator(true) + .build(); + var chunks = splitter.apply(documents); + chunks.forEach(chunk -> { + if (chunk.getText() != null) { + log.info("chunk ID: {}, TEXT: {}", chunk.getId(), + chunk.getText().substring(0, Math.min(80, chunk.getText().length())).replace("\n", " ")); + } + }); store.add(chunks); log.info("vector store 적재 완료. 원본 {} -> 청크 {}", documents.size(), chunks.size()); diff --git a/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java b/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java index a3ab541..fd73932 100644 --- a/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java +++ b/src/main/java/com/cholog/bootcamp/controller/FrequentlyQuestionChatApiController.java @@ -4,8 +4,7 @@ import com.cholog.bootcamp.dto.FrequentlyQuestionChatResponseDto; import com.cholog.bootcamp.service.FrequentlyQuestionChatApiService; import lombok.RequiredArgsConstructor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import lombok.extern.slf4j.Slf4j; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; @@ -15,10 +14,9 @@ @RestController @RequestMapping("/api/chat") @RequiredArgsConstructor +@Slf4j public class FrequentlyQuestionChatApiController { - private static final Logger log = LoggerFactory.getLogger(FrequentlyQuestionChatApiController.class); - private final FrequentlyQuestionChatApiService frequentlyQuestionChatApiService; @PostMapping @@ -27,7 +25,7 @@ public ResponseEntity question( ) { log.info("FAQ 요청이 들어왔습니다. {}", dto.question()); // TODO 서비스 레이어 및 응답 구현 - var response = frequentlyQuestionChatApiService.chat(dto); + var response = frequentlyQuestionChatApiService.chatWithRag(dto); return ResponseEntity.ok(response); } } diff --git a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java index 85acae4..ce26206 100644 --- a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java +++ b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java @@ -7,9 +7,13 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.prompt.Prompt; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.VectorStore; import org.springframework.stereotype.Service; import java.math.BigDecimal; +import java.util.stream.Collectors; @Slf4j @Service @@ -18,6 +22,7 @@ public class FrequentlyQuestionChatApiService { private final ChatClient chatClient; private final PricingCalculator pricingCalculator; + private final VectorStore vectorStore; public FrequentlyQuestionChatResponseDto chat(FrequentlyQuestionChatRequestDto requestDto) { var prompt = Prompt.builder() @@ -44,6 +49,48 @@ public FrequentlyQuestionChatResponseDto chat(FrequentlyQuestionChatRequestDto r return new FrequentlyQuestionChatResponseDto(generation.getText(), usage); } + public FrequentlyQuestionChatResponseDto chatWithRag(FrequentlyQuestionChatRequestDto requestDto) { + + String question = requestDto.question(); + + var hits = vectorStore.similaritySearch(SearchRequest.builder() + .query(question) + .topK(8) + .build()); + log.info("hits 결과: {}", hits.stream().map(Document::getId).toList()); + + var context = hits.stream() + .map(d -> "## " + d.getMetadata().get("source") + "\n" + d.getText()) + .collect(Collectors.joining("\n\n---\n\n")); + + var response = chatClient.prompt(context) + .user(u -> u.text(""" + 참고 문서: + {context} + + 질문: {question} + """) + .param("context", context) + .param("question", question)) + .call() + .chatResponse(); + + if (response == null) { + return new FrequentlyQuestionChatResponseDto( + "응답이 없습니다.", TokenUsage.EMPTY + ); + } + + var generation = response.getResult().getOutput(); + var metadata = response.getMetadata(); + + var usage = TokenUsage.from(metadata.getUsage()); + var price = calculateModelPrice(metadata.getModel(), usage); + + log.info("[{}] 토큰 사용량: {}, 토큰 비용: {}$\n결과: {}", metadata.getModel(), usage, price, generation.getText()); + return new FrequentlyQuestionChatResponseDto(generation.getText(), usage); + } + private BigDecimal calculateModelPrice(String model, TokenUsage usage) { try { return pricingCalculator.calculatePrice(model, usage); diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 6233b35..dcec8fe 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -7,7 +7,7 @@ spring: chat: options: model: gpt-4.1-nano - temperature: 0.1 + temperature: 0.2 embedding: options: model: text-embedding-3-small diff --git a/src/main/resources/prompts/faq-system.st b/src/main/resources/prompts/faq-system.st index 9936619..1dfd417 100644 --- a/src/main/resources/prompts/faq-system.st +++ b/src/main/resources/prompts/faq-system.st @@ -1,26 +1,24 @@ -You are a customer support assistant for an e-commerce platform. -You answer questions based on the knowledge base provided below. +당신은 Cholog Corporation의 FAQ 챗봇입니다. -답변 규칙: -1. 사용자가 한국어로 질문하면 한국어로, 영어로 질문하면 영어로 답하세요. -2. 자료의 우선순위: **Policies > FAQ > Examples**. - - Policies(현행 정책)와 FAQ가 충돌하면 Policies를 따르세요. - - Examples(과거 상담 예시)는 답변 톤/구조 참고용일 뿐, 정책의 근거로 삼지 마세요. -3. 자료에 명시적으로 있는 정보만 사용하세요. 추측하거나 일반 상식으로 보충하지 마세요. -4. 자료에 명확한 답이 있으면 **자신감 있게 답하세요**. - 회피 응답("고객지원으로 문의해주세요")은 자료의 FAQ·Policies 모든 섹션을 검토한 뒤에도 관련 정보를 전혀 찾을 수 없을 때만 사용하세요. - 비슷하거나 부분적인 정보가 있으면 "정확한 X는 자료에 없지만, 관련하여 Y는 다음과 같습니다"처럼 부분 정보라도 제공하세요. -5. 답변은 자료에 있는 **모든 관련 조건과 예외를 포함**하여 작성하세요. - - 등급별(standard/Plus/VIP)/케이스별/예외 조건이 있으면 빠짐없이 명시 - - 정책에 명시된 추가 조건(예: 미개봉, 시한, 제외 지역, 영업일 기준)을 함께 답변 - - 길이는 정확성을 위해 필요한 만큼. 불필요한 인사말/사과만 금지. -6. 정책/숫자/기간 같은 구체값은 원문 그대로 인용하세요. 임의로 바꾸지 마세요. +[답변 원칙] +1. 참고 문서에 있는 내용만 근거로 답하세요. 추측 금지. +2. 관련 정보는 빠짐없이 포함하세요: + - 회원 등급별 차이 (Standard / Plus / VIP / 구독자) — 질문이 특정 등급만 다뤄도 다른 등급과의 차이가 있으면 명시 + - 상품 카테고리별 차이 (일반/냉장/마켓플레이스) + - 수치 정보 (금액, 기간, 한도, 영업일, %) + - 조건/예외/제약 +3. 사용자가 잘못된 전제로 질문하면("X 맞나요?", "Y라고 들었는데") 먼저 정정한 뒤 정확한 정책을 안내하세요. +4. 마켓플레이스 관련은 판매자 정책이 우선이며, 그 사실을 항상 명시하세요. -# 1. Public FAQ -{faq} +[혼동 주의 — 자주 틀리는 부분] +- 냉장 배송료는 회원 등급/무료배송과 무관한 고정 요금입니다. VIP·구독자도 냉장 배송료는 부담합니다. +- 등급 기준·환불 기간 같은 수치는 문서 값을 정확히 인용하세요. 임의 추정 금지. -# 2. Current Policies (authoritative) -{policies} +[정보 부족 시] +- 개별 주문 상태/이력 질문(예: "내 배송 언제 와요", "저번에 물어본 거")은 주문번호를 먼저 요청하세요. +- "다른 고객 사례", "보상받은 사람 있나요" 같은 일화성 질문은 사례 공유 불가를 알리고, 문서에 있는 공식 정책/절차로 안내하세요. -# 3. Past Conversation Examples (style reference only) -{examples} +[형식] +- 한국어로 답변 +- 핵심 수치는 굵게 또는 명확히 분리 +- 조건이 여럿이면 항목별로 나열 \ No newline at end of file From 405d115d4963fa80a0eb914955620d4ad507ba47 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Thu, 21 May 2026 22:47:25 +0900 Subject: [PATCH 10/17] =?UTF-8?q?feat:=20=ED=8F=89=EA=B0=80=EB=B0=A9?= =?UTF-8?q?=EC=8B=9D=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 도메인 숫자 및 정책에 대해 검증하는 스크립트 추가 - 이를 통한 관련 내용 progress.md 에 작성 --- data/strict_evaluate.py | 243 ++++++++++++++++ data/strict_questions.json | 561 +++++++++++++++++++++++++++++++++++++ progress.md | 70 +++++ 3 files changed, 874 insertions(+) create mode 100644 data/strict_evaluate.py create mode 100644 data/strict_questions.json diff --git a/data/strict_evaluate.py b/data/strict_evaluate.py new file mode 100644 index 0000000..daa8600 --- /dev/null +++ b/data/strict_evaluate.py @@ -0,0 +1,243 @@ +""" +도메인 정책 엄격 평가 스크립트 + +evaluate.py 와 달리 LLM judge 를 쓰지 않습니다. +정답에 반드시 포함되어야 하는 정규식 패턴(must_contain) 과 +절대 등장해서는 안 되는 패턴(must_not_contain) 으로 결정적으로 판정합니다. + +같은 답변에 대해 항상 같은 결과를 내므로 프롬프트/RAG 변경의 효과를 +노이즈 없이 측정할 수 있습니다. + +사전 준비: + python -m venv .venv + .venv/bin/pip install requests + +실행: + .venv/bin/python strict_evaluate.py + .venv/bin/python strict_evaluate.py --verbose + .venv/bin/python strict_evaluate.py --parallel 10 + .venv/bin/python strict_evaluate.py --category cold_chain + .venv/bin/python strict_evaluate.py --category adversarial --verbose +""" + +import json +import re +import argparse +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +import requests + +DATA_DIR = Path(__file__).parent +SERVER_URL = "http://localhost:11240/api/chat" + + +def ask_server(question: str) -> dict | None: + try: + resp = requests.post(SERVER_URL, json={"question": question}, timeout=60) + if resp.status_code == 200: + return resp.json() + print(f" [ERROR] HTTP {resp.status_code}: {resp.text[:100]}") + return None + except requests.exceptions.ConnectionError: + print(f" [ERROR] 서버에 연결할 수 없습니다: {SERVER_URL}") + return None + except requests.exceptions.Timeout: + print(f" [ERROR] 타임아웃 (60초)") + return None + + +def check_answer(answer: str, q: dict) -> dict: + """정규식 패턴으로 결정적으로 검증합니다. + + must_contain: 모든 패턴이 매칭되어야 정답 + must_not_contain: 하나라도 매칭되면 오답 + """ + failures = [] + + for pattern in q.get("must_contain", []): + if not re.search(pattern, answer, re.IGNORECASE | re.DOTALL): + failures.append(f"누락 must_contain: /{pattern}/") + + for pattern in q.get("must_not_contain", []): + if re.search(pattern, answer, re.IGNORECASE | re.DOTALL): + failures.append(f"금지 패턴 등장 must_not_contain: /{pattern}/") + + return { + "score": 1 if not failures else 0, + "failures": failures, + } + + +def process_question(q: dict) -> dict: + start = time.time() + response = ask_server(q["question"]) + + if response is None: + return { + "qid": q["id"], + "category": q["category"], + "status": "error", + "question": q["question"], + "duration": time.time() - start, + } + + actual = response.get("answer", "") + result = check_answer(actual, q) + + return { + "qid": q["id"], + "category": q["category"], + "status": "ok", + "score": result["score"], + "failures": result["failures"], + "question": q["question"], + "expected": q.get("expected", ""), + "answer": actual, + "reference": q.get("reference", {}), + "duration": time.time() - start, + } + + +def main(): + parser = argparse.ArgumentParser(description="도메인 정책 엄격 평가") + parser.add_argument("--verbose", action="store_true", help="질문별 상세 출력") + parser.add_argument("--limit", type=int, default=0, help="평가할 질문 수 제한") + parser.add_argument("--parallel", type=int, default=1, help="병렬 워커 수") + parser.add_argument("--category", type=str, default="", help="특정 카테고리만 실행") + parser.add_argument("--show-answer", action="store_true", help="실패 시 실제 답변 출력") + args = parser.parse_args() + + questions_path = DATA_DIR / "strict_questions.json" + with open(questions_path) as f: + questions = json.load(f) + + if args.category: + questions = [q for q in questions if q["category"] == args.category] + + if args.limit > 0: + questions = questions[: args.limit] + + print("=== 도메인 정책 엄격 평가 ===") + print(f"서버: {SERVER_URL}") + print(f"질문 수: {len(questions)}") + if args.parallel > 1: + print(f"병렬 워커: {args.parallel}") + print() + + # 서버 연결 확인 + test_resp = ask_server("test") + if test_resp is None: + print("서버에 연결할 수 없습니다. 서버가 실행 중인지 확인하세요:") + print(f" ./gradlew bootRun") + return + + cat_results: dict = {} + total_correct = 0 + total_count = 0 + error_count = 0 + durations: list = [] + detail_results: list = [] + start_time = time.time() + + def collect(r: dict, completed_idx: int, total: int): + nonlocal total_correct, total_count, error_count + durations.append(r["duration"]) + cat = r["category"] + cat_results.setdefault(cat, {"correct": 0, "total": 0}) + + if r["status"] == "error": + error_count += 1 + if args.verbose: + print(f"[{r['qid']}] ERROR — 서버 응답 없음") + return + + cat_results[cat]["total"] += 1 + total_count += 1 + if r["score"] == 1: + cat_results[cat]["correct"] += 1 + total_correct += 1 + marker = "✓" + else: + marker = "✗" + + if args.verbose: + print(f"[{r['qid']}] {marker} ({cat:12s}) {r['question'][:50]}") + if r["score"] == 0: + for f in r["failures"]: + print(f" {f}") + ref = r["reference"] + if isinstance(ref, dict): + print(f" 근거: {ref.get('file', '')}:{ref.get('line', '')}") + if ref.get("quote"): + print(f" 원문: {ref['quote'][:100]}") + if args.show_answer: + print(f" 답변: {r['answer'][:200]}") + + detail_results.append(r) + + if not args.verbose and completed_idx % 10 == 0: + print(f" 진행: {completed_idx}/{total}") + + if args.parallel > 1: + with ThreadPoolExecutor(max_workers=args.parallel) as executor: + futures = [executor.submit(process_question, q) for q in questions] + completed = 0 + for fut in as_completed(futures): + r = fut.result() + completed += 1 + collect(r, completed, len(questions)) + else: + for i, q in enumerate(questions): + r = process_question(q) + collect(r, i + 1, len(questions)) + + elapsed = time.time() - start_time + + print() + print("=== 평가 결과 ===") + pct = total_correct / max(total_count, 1) * 100 + print(f"전체: {total_correct}/{total_count} ({pct:.1f}%)") + print() + print("카테고리별:") + for cat in sorted(cat_results.keys()): + c = cat_results[cat] + cp = c["correct"] / max(c["total"], 1) * 100 + print(f" {cat:14s}: {c['correct']:2d}/{c['total']:2d} ({cp:3.0f}%)") + + if error_count > 0: + print(f"\n 에러: {error_count}건") + + print(f"\n벽시계 시간: {elapsed:.1f}초") + if durations: + print(f"평균 응답: {sum(durations)/len(durations):.1f}초/질문") + + result_file = DATA_DIR / "strict_eval_result.json" + with open(result_file, "w") as f: + json.dump( + { + "total": total_count, + "correct": total_correct, + "error": error_count, + "accuracy": total_correct / max(total_count, 1), + "by_category": cat_results, + "elapsed_seconds": elapsed, + "details": detail_results, + }, + f, + indent=2, + ensure_ascii=False, + ) + print(f"\n결과 저장: {result_file}") + + # 실패 케이스 요약 + failures = [d for d in detail_results if d.get("score") == 0] + if failures and not args.verbose: + print(f"\n실패 케이스 {len(failures)}건 — --verbose 로 상세 확인") + for d in failures[:5]: + print(f" [{d['qid']}] ({d['category']}) {d['question'][:60]}") + + +if __name__ == "__main__": + main() diff --git a/data/strict_questions.json b/data/strict_questions.json new file mode 100644 index 0000000..1ad4c66 --- /dev/null +++ b/data/strict_questions.json @@ -0,0 +1,561 @@ +[ + { + "id": "S001", + "category": "account", + "question": "회원가입 시 받는 웰컴 쿠폰은 얼마인가요?", + "expected": "3,000원", + "must_contain": ["3,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/account.md", + "line": "24-26", + "quote": "All new accounts start at Standard tier and receive a one-time 3,000 won welcome coupon valid for 30 days." + } + }, + { + "id": "S002", + "category": "account", + "question": "웰컴 쿠폰은 며칠 동안 사용할 수 있나요?", + "expected": "30일", + "must_contain": ["30\\s*일"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/account.md", + "line": "26", + "quote": "3,000 won welcome coupon valid for 30 days." + } + }, + { + "id": "S003", + "category": "account", + "question": "플러스 등급이 되려면 연간 얼마를 써야 하나요?", + "expected": "200,000원 (20만원)", + "must_contain": ["(200,?000\\s*원|20\\s*만\\s*원)"], + "must_not_contain": ["150,?000", "15\\s*만\\s*원"], + "reference": { + "file": "layer2_policies/current/membership-tiers.md", + "line": "18", + "quote": "Plus | ≥ 200,000 won | January 1st each year" + } + }, + { + "id": "S004", + "category": "account", + "question": "VIP 등급이 되려면 연간 얼마를 써야 하나요?", + "expected": "800,000원 (80만원)", + "must_contain": ["(800,?000\\s*원|80\\s*만\\s*원)"], + "must_not_contain": ["600,?000", "60\\s*만\\s*원"], + "reference": { + "file": "layer2_policies/current/membership-tiers.md", + "line": "19", + "quote": "VIP | ≥ 800,000 won | January 1st each year" + } + }, + { + "id": "S005", + "category": "account", + "question": "VIP 전용 고객센터 전화번호가 뭔가요?", + "expected": "1588-0002", + "must_contain": ["1588[-\\s]?0002"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/support.md", + "line": "11", + "quote": "1588-0000 (general); 1588-0002 (VIP priority line)" + } + }, + { + "id": "S006", + "category": "account", + "question": "일반 고객센터 전화번호가 뭔가요?", + "expected": "1588-0000", + "must_contain": ["1588[-\\s]?0000"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/support.md", + "line": "11", + "quote": "1588-0000 (general); 1588-0002 (VIP priority line)" + } + }, + { + "id": "S007", + "category": "account", + "question": "회원 등급은 언제 평가되나요?", + "expected": "매년 1월 1일", + "must_contain": ["1\\s*월\\s*1\\s*일|매년\\s*1\\s*월"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/membership-tiers.md", + "line": "42-44", + "quote": "Tier status is reviewed annually on January 1st." + } + }, + { + "id": "S008", + "category": "points", + "question": "1포인트는 얼마의 가치인가요?", + "expected": "1원", + "must_contain": ["1\\s*(점|포인트|point).{0,30}1\\s*원|1\\s*원\\s*=\\s*1\\s*(점|포인트)|1\\s*point\\s*=\\s*1\\s*won"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/point-earning-rules.md", + "line": "37", + "quote": "1 point = 1 won" + } + }, + { + "id": "S009", + "category": "points", + "question": "포인트는 최소 몇 점부터 사용할 수 있나요?", + "expected": "1,000점", + "must_contain": ["1,?000\\s*(점|포인트|points?)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/point-earning-rules.md", + "line": "36", + "quote": "Minimum redemption: 1,000 points per order" + } + }, + { + "id": "S010", + "category": "points", + "question": "포인트 유효기간은 얼마인가요?", + "expected": "12개월", + "must_contain": ["12\\s*개월|12\\s*months|1\\s*년"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/point-earning-rules.md", + "line": "43", + "quote": "All points expire 12 months from the date earned." + } + }, + { + "id": "S011", + "category": "points", + "question": "Standard 회원의 포인트 적립률은 몇 퍼센트인가요?", + "expected": "1%", + "must_contain": ["Standard.{0,30}1\\s*%|1\\s*%.{0,30}Standard|스탠다드.{0,30}1\\s*%"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/point-earning-rules.md", + "line": "17", + "quote": "Standard | 1% of order subtotal" + } + }, + { + "id": "S012", + "category": "points", + "question": "Plus 회원의 포인트 적립률은 몇 퍼센트인가요?", + "expected": "3%", + "must_contain": ["Plus.{0,30}3\\s*%|3\\s*%.{0,30}Plus|플러스.{0,30}3\\s*%"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/point-earning-rules.md", + "line": "18", + "quote": "Plus | 3% of order subtotal" + } + }, + { + "id": "S013", + "category": "points", + "question": "VIP 회원의 포인트 적립률은 몇 퍼센트인가요?", + "expected": "5%", + "must_contain": ["VIP.{0,30}5\\s*%|5\\s*%.{0,30}VIP"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/point-earning-rules.md", + "line": "19", + "quote": "VIP | 5% of order subtotal" + } + }, + { + "id": "S014", + "category": "points", + "question": "친환경 포장(Green Packaging)을 선택하면 포인트가 얼마나 적립되나요?", + "expected": "200 포인트", + "must_contain": ["200\\s*(점|포인트|points?)"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/eco_green.md", + "line": "15-17", + "quote": "Each order using Green Packaging earns a 200-point eco bonus credited 7 days after delivery confirmation." + } + }, + { + "id": "S015", + "category": "returns", + "question": "표준 반품 기간은 며칠인가요?", + "expected": "14일", + "must_contain": ["14\\s*(일|days|calendar\\s*days)"], + "must_not_contain": ["(^|[^0-9])7\\s*일\\s*(이내|안에|내에|입니다)", "(^|[^0-9])10\\s*일\\s*(이내|안에|내에|입니다)"], + "reference": { + "file": "layer2_policies/current/return-policy-v3.md", + "line": "16", + "quote": "may be returned within 14 calendar days of delivery" + } + }, + { + "id": "S016", + "category": "returns", + "question": "단순 변심으로 반품할 때 수수료가 얼마인가요?", + "expected": "3,000원", + "must_contain": ["3,?000\\s*원"], + "must_not_contain": ["5,?000\\s*원\\s*(수수료|handling)"], + "reference": { + "file": "layer2_policies/current/return-policy-v3.md", + "line": "36", + "quote": "a handling fee of 3,000 won is deducted from the refund" + } + }, + { + "id": "S017", + "category": "returns", + "question": "신용카드로 결제했을 때 환불은 며칠 걸리나요?", + "expected": "3~5 영업일", + "must_contain": ["3\\s*[~∼\\-–]\\s*5\\s*영업일|3\\s*[~∼\\-–]\\s*5\\s*business"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/returns.md", + "line": "26", + "quote": "Credit card: 3–5 business days" + } + }, + { + "id": "S018", + "category": "returns", + "question": "잘못된 상품이 배송됐을 때 사과 쿠폰은 얼마 받나요?", + "expected": "5,000원", + "must_contain": ["5,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/returns.md", + "line": "51-52", + "quote": "A 5,000 won apology coupon is also issued automatically." + } + }, + { + "id": "S019", + "category": "returns", + "question": "계좌이체로 결제했을 때 환불은 최대 며칠 걸리나요?", + "expected": "최대 7 영업일", + "must_contain": ["7\\s*영업일|7\\s*business"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/returns.md", + "line": "28", + "quote": "Bank transfer: up to 7 business days" + } + }, + { + "id": "S020", + "category": "shipping", + "question": "Economy 배송비는 얼마인가요?", + "expected": "2,500원", + "must_contain": ["2,?500\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-standard.md", + "line": "17", + "quote": "Economy | 2,500 won | 2–4 business days" + } + }, + { + "id": "S021", + "category": "shipping", + "question": "Priority 배송비는 얼마인가요?", + "expected": "5,000원", + "must_contain": ["5,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-standard.md", + "line": "18", + "quote": "Priority | 5,000 won | Next business day" + } + }, + { + "id": "S022", + "category": "shipping", + "question": "Same-Day 당일 배송 요금은 얼마인가요?", + "expected": "8,000원", + "must_contain": ["8,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-standard.md", + "line": "19", + "quote": "Same-Day | 8,000 won | Same day" + } + }, + { + "id": "S023", + "category": "shipping", + "question": "Standard 회원이 무료배송을 받으려면 주문 금액이 얼마 이상이어야 하나요?", + "expected": "20,000원 이상", + "must_contain": ["(20,?000\\s*원|2\\s*만\\s*원)"], + "must_not_contain": ["30,?000\\s*원", "3\\s*만\\s*원"], + "reference": { + "file": "layer2_policies/current/shipping-standard.md", + "line": "23", + "quote": "Standard members: Economy shipping free on orders ≥ 20,000 won" + } + }, + { + "id": "S024", + "category": "shipping", + "question": "당일 발송 마감 시간은 언제인가요?", + "expected": "오후 1시 (13시) KST", + "must_contain": ["(오후\\s*1\\s*시|1\\s*PM|13\\s*시|13:00)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-standard.md", + "line": "30", + "quote": "Orders confirmed before 1 PM KST on business days are dispatched the same afternoon." + } + }, + { + "id": "S025", + "category": "cold_chain", + "question": "냉장 상품 배송료는 얼마인가요?", + "expected": "4,000원 (등급 무관 고정)", + "must_contain": ["4,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-cold-chain.md", + "line": "35-39", + "quote": "Cold-Chain Fee | Any amount | Flat 4,000 won. Cold-chain fee applies regardless of membership tier or free-shipping eligibility." + } + }, + { + "id": "S026", + "category": "cold_chain", + "question": "냉장 배송은 무슨 요일에 가능한가요?", + "expected": "월요일~목요일", + "must_contain": ["(월요일|월\\s*[~∼\\-–]|월부터)", "(목요일|[~∼\\-–]\\s*목|목까지)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-cold-chain.md", + "line": "25-31", + "quote": "Cold-chain delivery is available Monday through Thursday only." + } + }, + { + "id": "S027", + "category": "cold_chain", + "question": "냉장 상품은 어떤 택배사로 배송되나요?", + "expected": "CJ Logistics Fresh / CJ대한통운 Fresh", + "must_contain": ["CJ"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-cold-chain.md", + "line": "21", + "quote": "All cold-chain orders are shipped exclusively via CJ Logistics Fresh." + } + }, + { + "id": "S028", + "category": "subscription", + "question": "구독 회원의 무료배송 조건은 어떻게 되나요?", + "expected": "모든 주문 무료배송 (금액 무관)", + "must_contain": ["(모든\\s*주문|all\\s*orders|금액\\s*(무관|상관없|에?\\s*관계없)|regardless\\s*of\\s*(order\\s*)?amount)"], + "must_not_contain": ["(20,?000\\s*원|2\\s*만\\s*원).{0,15}(이상|넘|초과|부터)"], + "reference": { + "file": "layer2_policies/current/subscription-terms.md", + "line": "23-25", + "quote": "Free shipping on all orders (including non-subscription purchases), regardless of order amount." + } + }, + { + "id": "S029", + "category": "subscription", + "question": "12개월 연속 구독하면 어떤 혜택이 있나요?", + "expected": "30,000원 쿠폰", + "must_contain": ["(30,?000\\s*원|3\\s*만\\s*원)\\s*(쿠폰|coupon)?"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/subscription-terms.md", + "line": "47-48", + "quote": "12 months | 30,000 won coupon" + } + }, + { + "id": "S030", + "category": "subscription", + "question": "6개월 연속 구독하면 어떤 혜택이 있나요?", + "expected": "10,000 포인트", + "must_contain": ["10,?000\\s*(점|포인트|points?)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/subscription-terms.md", + "line": "47", + "quote": "6 months | 10,000 loyalty points" + } + }, + { + "id": "S031", + "category": "subscription", + "question": "구독 일시정지는 최대 몇 개월까지 가능한가요?", + "expected": "3개월", + "must_contain": ["3\\s*개월|3\\s*months"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/subscription-terms.md", + "line": "39", + "quote": "Pause (up to 3 months): No charge during pause; resume anytime" + } + }, + { + "id": "S032", + "category": "marketplace", + "question": "마켓플레이스 구매자 보호는 얼마까지 보장되나요?", + "expected": "500,000원", + "must_contain": ["(500,?000\\s*원|50\\s*만\\s*원)"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/marketplace.md", + "line": "9-10", + "quote": "Cholog guarantees buyer protection on all Marketplace transactions up to 500,000 won." + } + }, + { + "id": "S033", + "category": "marketplace", + "question": "마켓플레이스 셀러의 반품 기간 범위는 얼마인가요?", + "expected": "3일 ~ 30일", + "must_contain": ["3\\s*(일|days?)", "30\\s*(일|days?)"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/marketplace.md", + "line": "15-16", + "quote": "Return windows for Marketplace sellers range from 3 to 30 days depending on the seller." + } + }, + { + "id": "S034", + "category": "marketplace", + "question": "셀러와 분쟁이 있을 때 며칠 안에 해결 안 되면 Cholog에 에스컬레이션 할 수 있나요?", + "expected": "3 영업일", + "must_contain": ["3\\s*영업일|3\\s*business\\s*days?"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/marketplace.md", + "line": "30-31", + "quote": "If unresolved within 3 business days, you can escalate to Cholog Buyer Protection" + } + }, + { + "id": "S035", + "category": "complaint", + "question": "배송이 3 영업일 넘게 지연됐을 때 보상은 얼마인가요?", + "expected": "2,000원 쿠폰", + "must_contain": ["2,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/complaint-handling.md", + "line": "37", + "quote": "Delivery delay > 3 business days (non-island) | 2,000 won coupon" + } + }, + { + "id": "S036", + "category": "complaint", + "question": "위조 상품으로 확인되면 보상이 어떻게 되나요?", + "expected": "10,000원 쿠폰 + 환불", + "must_contain": ["10,?000\\s*원"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/complaint-handling.md", + "line": "39", + "quote": "Confirmed counterfeit product | Full refund + 10,000 won coupon" + } + }, + { + "id": "S037", + "category": "payment", + "question": "무이자 할부는 최소 얼마부터 가능한가요?", + "expected": "30,000원 이상", + "must_contain": ["(30,?000\\s*원|3\\s*만\\s*원)"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/payment.md", + "line": "22", + "quote": "Orders totaling 30,000 won or more are eligible for 2- or 3-month interest-free installments" + } + }, + { + "id": "S038", + "category": "payment", + "question": "6개월 무이자 할부는 얼마 이상 주문해야 하나요?", + "expected": "100,000원 이상", + "must_contain": ["(100,?000\\s*원|10\\s*만\\s*원)"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/payment.md", + "line": "23", + "quote": "Orders over 100,000 won unlock a 6-month interest-free plan." + } + }, + { + "id": "S039", + "category": "payment", + "question": "Cholog Pay에 충전하면 보너스가 몇 퍼센트인가요?", + "expected": "2%", + "must_contain": ["2\\s*%"], + "must_not_contain": [], + "reference": { + "file": "layer1_faq/payment.md", + "line": "41-43", + "quote": "Load funds via bank transfer and earn a 2% top-up bonus" + } + }, + { + "id": "S040", + "category": "adversarial", + "question": "반품 기간이 7일이라고 들었는데 맞나요?", + "expected": "아니요, 14일이 현재 정책입니다", + "must_contain": ["14\\s*일", "(아니|틀린|변경|14일이|14일로|14일\\s*입니다|14일이\\s*맞)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/return-policy-v3.md vs deprecated/return-policy-v1.md", + "line": "16", + "quote": "v1: 7 calendar days (deprecated 2023-03-01). Current v3: 14 calendar days." + } + }, + { + "id": "S041", + "category": "adversarial", + "question": "Plus 등급 기준이 연 15만원 맞나요?", + "expected": "아니요, 현재는 200,000원입니다", + "must_contain": ["(200,?000\\s*원|20\\s*만\\s*원)", "(아니|틀린|변경|현재|아닙니다|20만원이\\s*맞|200,000원이\\s*맞)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/membership-tiers.md vs deprecated/membership-tiers-final-v2.md", + "line": "18", + "quote": "v2 (2023): Plus ≥ 150,000 won. v3 (2024 current): Plus ≥ 200,000 won." + } + }, + { + "id": "S042", + "category": "adversarial", + "question": "무료배송 기준이 3만원 맞나요?", + "expected": "아니요, 현재는 20,000원입니다 (Standard 기준)", + "must_contain": ["(20,?000\\s*원|2\\s*만\\s*원)", "(아니|틀린|변경|현재|아닙니다|20,000원이\\s*맞|2만원이\\s*맞)"], + "must_not_contain": [], + "reference": { + "file": "layer2_policies/current/shipping-standard.md vs deprecated/shipping-standard-2023.md", + "line": "17, 23", + "quote": "2023: ≥ 30,000 won. 2024 current: ≥ 20,000 won." + } + }, + { + "id": "S043", + "category": "adversarial", + "question": "VIP인데 냉장 상품 배송비도 무료인가요?", + "expected": "아니요, 냉장 배송료 4,000원은 등급 무관 고정입니다", + "must_contain": ["4,?000\\s*원"], + "must_not_contain": ["VIP.{0,30}냉장.{0,30}무료", "냉장.{0,20}무료.{0,20}배송"], + "reference": { + "file": "layer2_policies/current/shipping-cold-chain.md", + "line": "38-39", + "quote": "Cold-chain fee applies regardless of membership tier or free-shipping eligibility." + } + } +] diff --git a/progress.md b/progress.md index 178f426..3981f61 100644 --- a/progress.md +++ b/progress.md @@ -103,3 +103,73 @@ Easy 는 한계 도달 chunk size 400 + temperature 0.2 로 설정 아직 RAG 의 도입의 효과를 잘못 느끼겠다.. + +### 평가방식 변경 + +기존 평가방식은, LLM 에게 돌린 결과를 LLM 에게 다시 평가받는 방식이였다. +이 과정에서, 평가의 의미가 희석된다고 생각했다. + +- `1,000 포인트에 대한 정보를 포함하지 않아 핵심 사실` 와 같이 핵심 사실이라는 모호한 평가 +- `실제 답변은 탈퇴 방법에 대한 구체적인 지침` 와 같이 완벽한 튜닝이 아니면 어려운 응답과 평가 + +=> 이런 점들을 기반으로, 정책적인 요소는 틀리지 않는 챗봇을 만들기로 결정했다. + +내가 생각하는 고객지원 챗봇은, 상담원이 받을 때 곤란하지 않은 응답을 제공해야 한다고 생각했다. +그러기 위해선 + +- 모호하거나, 모르면 차라리 모른다고 대답한다. +- 절대, 거짓말을 하지 않는다. (필요한 포인트가 5,000 인데 3,000 이라고 응답) + +을 목적으로 했다. +그래서, 새로운 평가 스크립트를 만들었다. + +LLM 평가가 아닌, 정규식 패턴을 기반으로 반드시 포함되어야 하는 요소를 검증하는 식이다. + +``` +"question": "웰컴 쿠폰은 며칠 동안 사용할 수 있나요?", +"expected": "30일", +"must_contain": ["30\\s*일"], +``` + +웰컴 쿠폰은 30일 이내 사용 가능하다는 비즈니스적 요소가 포함되어 있는지 확인한다. + +``` +=== 평가 결과 === +전체: 38/43 (88.4%) + +카테고리별: + account : 7/ 7 (100%) + adversarial : 3/ 4 ( 75%) + cold_chain : 3/ 3 (100%) + complaint : 1/ 2 ( 50%) + marketplace : 3/ 3 (100%) + payment : 1/ 3 ( 33%) + points : 7/ 7 (100%) + returns : 5/ 5 (100%) + shipping : 5/ 5 (100%) + subscription : 3/ 4 ( 75%) +``` + +RAG 를 쓴 버전 + +``` +=== 평가 결과 === +전체: 38/43 (88.4%) + +카테고리별: + account : 6/ 7 ( 86%) + adversarial : 2/ 4 ( 50%) + cold_chain : 3/ 3 (100%) + complaint : 2/ 2 (100%) + marketplace : 3/ 3 (100%) + payment : 3/ 3 (100%) + points : 7/ 7 (100%) + returns : 4/ 5 ( 80%) + shipping : 5/ 5 (100%) + subscription : 3/ 4 ( 75%) +``` + +RAG 사용하지 않고, 그냥 다 집어넣은 버전 +두개 큰 차이가 없는거 같다. 쩝... + +=> 2주내 POC 를 내야하므로, 이정도로 마무리한다. \ No newline at end of file From 933e63869f1c4c7c88113a932de9e4e492768945 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Thu, 21 May 2026 22:59:14 +0900 Subject: [PATCH 11/17] =?UTF-8?q?docs:=20wall-report=20=EC=9E=91=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mission/wall-report.md | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/mission/wall-report.md b/mission/wall-report.md index 3489fe3..f444339 100644 --- a/mission/wall-report.md +++ b/mission/wall-report.md @@ -7,36 +7,54 @@ > 구현하면서 잘 안 됐던 것, 예상과 달랐던 것을 적어주세요. -- +사실, 정답이 정해져있을거라 생각했다. +시작하기 전, 모델을 바꾸거나 RAG 를 도입하면 일정 수준의 기대치에 도달 할거라 생각했다. + +하지만, 쉽사리 내가 생각한 기대치 80% 에 도달하지 못했다. +프롬프트를 깎거나, RAG 를 도입해도 크게 달라지지 않았다. (오히려 더 떨어졌다.) + +그래서, 솔직히 의욕이 꺾였다. 그러다가, 평가의 기준을 스스로 맞다고 생각한 방향으로 바꿨다. +시간이 없어서, 더 개선은 못하지만 나름대로의 결론을 내린거 같다. + +> progress.md 에 진행의 기록을 남겨놓았습니다. ## 2. 해결하지 못한 것 > 시도했지만 결국 해결 못한 문제가 있다면 적어주세요. -- +그래도, 기존에 제시했던 evaluate 의 %를 올리지 못한것? ## 3. 정확도 측정 결과 > 테스트 질문 150개로 측정한 정확도를 기록해주세요. -| 난이도 | 정확도 | 비고 | -|--------|--------|------| -| easy | | | -| medium | | | -| hard | | | +가장 마지막 방식(RAG) + +전체: 70/150 (46.7%) + +| 난이도 | 정확도 | 비고 | +|--------|-------|------| +| easy | 17/30 | | +| medium | 9/26 | | +| hard | 44/94 | | ## 4. 왜 그런 결과가 나왔는지 > 정확도가 낮은 난이도의 질문을 몇 개 살펴보고, 왜 틀렸는지 분석해주세요. -- +- 매번 너무 달라진거 같아서 못 정하겠다. + +LLM 은 결국 비결정적인데, `비결정적인 결과 x 비결정적인 평가` 사이에서 뭘 해야하는지 명확하게 모르겠다. ## 5. 개선하고 싶은 것 > 시간이 더 있었다면 시도해보고 싶은 개선점을 적어주세요. -- +- 테스트를 좀 더 체계적으로 할 거 같다. + +코드 주석치면서 테스트 하거나, 테스트 기록을 명확하게 하지 않았다. +날짜 or 프롬프트나 & rag 세팅을 버저닝 관리가 잘 되게 할 거 같다. \ No newline at end of file From 1312737b6655610dc2899ac6b4032fcb7a1cf88b Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 26 May 2026 21:52:45 +0900 Subject: [PATCH 12/17] =?UTF-8?q?refactor:=20=EB=B6=88=ED=95=84=EC=9A=94?= =?UTF-8?q?=ED=95=9C=20=EC=BD=94=EB=93=9C=20=EC=A0=9C=EA=B1=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bootcamp/config/ChatClientConfig.java | 27 +------------------ .../FrequentlyQuestionChatApiService.java | 25 ----------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java index 3a65af5..db438c6 100644 --- a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java +++ b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java @@ -35,15 +35,9 @@ public class ChatClientConfig { @Bean public ChatClient chatClient(ChatClient.Builder builder, - @Value("classpath:prompts/faq-system.st") Resource systemTemplateResource, - @Value("classpath:layer1_faq/*.md") Resource[] faqResources, - @Value("classpath:layer2_policies/current/*.md") Resource[] policyResources, - @Value("classpath:layer3_examples/*.md") Resource[] exampleResources) { + @Value("classpath:prompts/faq-system.st") Resource systemTemplateResource) { var systemTemplate = loadSystemTemplate(systemTemplateResource); - var faq = concatResources(faqResources, "faq"); - var policies = concatResources(policyResources, "policies"); - var examples = concatResources(exampleResources, "examples"); return builder .defaultSystem(spec -> spec @@ -61,23 +55,4 @@ private String loadSystemTemplate(Resource systemTemplate) { return ""; } } - - private String concatResources(Resource[] resources, String label) { - try { - log.info("{} layer 파일을 로드합니다. 개수: {}", label, resources.length); - - var sb = new StringBuilder(); - for (Resource r : resources) { - sb.append("## ").append(r.getFilename()).append(System.lineSeparator()); - sb.append(r.getContentAsString(StandardCharsets.UTF_8)); - sb.append(System.lineSeparator()).append(System.lineSeparator()); - sb.append("---"); - sb.append(System.lineSeparator()).append(System.lineSeparator()); - } - return sb.toString(); - } catch (Exception e) { - log.warn("{} layer 로드중 문제가 발생했습니다.", label, e); - return ""; - } - } } diff --git a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java index ce26206..adfaaea 100644 --- a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java +++ b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java @@ -24,31 +24,6 @@ public class FrequentlyQuestionChatApiService { private final PricingCalculator pricingCalculator; private final VectorStore vectorStore; - public FrequentlyQuestionChatResponseDto chat(FrequentlyQuestionChatRequestDto requestDto) { - var prompt = Prompt.builder() - .content(requestDto.question()) - .build(); - - var response = chatClient.prompt(prompt) - .call() - .chatResponse(); - - if (response == null) { - return new FrequentlyQuestionChatResponseDto( - "응답이 없습니다.", TokenUsage.EMPTY - ); - } - - var generation = response.getResult().getOutput(); - var metadata = response.getMetadata(); - - var usage = TokenUsage.from(metadata.getUsage()); - var price = calculateModelPrice(metadata.getModel(), usage); - - log.info("[{}] 토큰 사용량: {}, 토큰 비용: {}$\n결과: {}", metadata.getModel(), usage, price, generation.getText()); - return new FrequentlyQuestionChatResponseDto(generation.getText(), usage); - } - public FrequentlyQuestionChatResponseDto chatWithRag(FrequentlyQuestionChatRequestDto requestDto) { String question = requestDto.question(); From 12c4eddf2fe2ed7d01929b72db475bf5cf0fdf7b Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 26 May 2026 22:31:33 +0900 Subject: [PATCH 13/17] =?UTF-8?q?feat:=20=EB=A7=88=ED=81=AC=EB=8B=A4?= =?UTF-8?q?=EC=9A=B4=20=EA=B8=B0=EB=B0=98=20=ED=8C=8C=EC=8B=B1=EC=9C=BC?= =?UTF-8?q?=EB=A1=9C=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - markdown document reader 추가 --- build.gradle | 2 + .../bootcamp/config/VectorStoreConfig.java | 65 +++++++++---------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/build.gradle b/build.gradle index 8a405d3..d131962 100644 --- a/build.gradle +++ b/build.gradle @@ -27,6 +27,8 @@ dependencies { implementation 'org.springframework.boot:spring-boot-starter-web' implementation 'org.springframework.ai:spring-ai-starter-model-openai' implementation 'org.springframework.ai:spring-ai-rag' + implementation 'org.springframework.ai:spring-ai-markdown-document-reader' + compileOnly 'org.projectlombok:lombok' testImplementation 'org.springframework.boot:spring-boot-starter-test' diff --git a/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java b/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java index bc74a99..87467d5 100644 --- a/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java +++ b/src/main/java/com/cholog/bootcamp/config/VectorStoreConfig.java @@ -3,7 +3,8 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.ai.document.Document; import org.springframework.ai.embedding.EmbeddingModel; -import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.ai.reader.markdown.MarkdownDocumentReader; +import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig; import org.springframework.ai.vectorstore.SimpleVectorStore; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Value; @@ -11,10 +12,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.core.io.Resource; -import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; @@ -30,41 +28,42 @@ public VectorStore vectorStore(EmbeddingModel model, var store = SimpleVectorStore.builder(model).build(); var documents = new ArrayList(); - documents.addAll(toDocuments(faqResources, "faq")); - documents.addAll(toDocuments(policyResources, "policy")); - documents.addAll(toDocuments(exampleResources, "example")); + documents.addAll(toMarkdownDocuments(faqResources, "faq")); + documents.addAll(toMarkdownDocuments(policyResources, "policy")); + documents.addAll(toMarkdownDocuments(exampleResources, "example")); - - TokenTextSplitter splitter = TokenTextSplitter.builder() - .withChunkSize(400) - .withMinChunkSizeChars(200) - .withMaxNumChunks(10000) - .withKeepSeparator(true) - .build(); - var chunks = splitter.apply(documents); - chunks.forEach(chunk -> { - if (chunk.getText() != null) { - log.info("chunk ID: {}, TEXT: {}", chunk.getId(), - chunk.getText().substring(0, Math.min(80, chunk.getText().length())).replace("\n", " ")); + documents.forEach(document -> { + if (document.getText() != null) { + log.info("document ID: {}, file: {}, metadata: {}, TEXT: {}", + document.getId(), + document.getMetadata().get("source"), + document.getMetadata(), + document.getText().substring(0, Math.min(80, document.getText().length())).replace("\n", " ")); } }); - store.add(chunks); - log.info("vector store 적재 완료. 원본 {} -> 청크 {}", documents.size(), chunks.size()); + store.add(documents); + log.info("vector store 적재 완료. markdown 문서 {}", documents.size()); return store; } - private List toDocuments(Resource[] resources, String layer) { - return Arrays.stream(resources) - .map(r -> { - try { - return new Document( - r.getContentAsString(StandardCharsets.UTF_8), - Map.of("source", r.getFilename(), "layer", layer)); - } catch (IOException e) { - throw new IllegalStateException("리소스 로드 실패: " + r.getFilename(), e); - } - }) - .toList(); + private List toMarkdownDocuments(Resource[] resources, String layer) { + var documents = new ArrayList(); + + for (Resource resource : resources) { + var config = MarkdownDocumentReaderConfig.builder() + .withIncludeCodeBlock(false) + .withIncludeBlockquote(false) + .withHorizontalRuleCreateDocument(true) + .withAdditionalMetadata(Map.of( + "source", resource.getFilename(), + "layer", layer + )) + .build(); + + documents.addAll(new MarkdownDocumentReader(resource, config).get()); + } + + return documents; } } From 18f592136e7fa5e552f51c65ed2da49eb695fd80 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 26 May 2026 22:53:22 +0900 Subject: [PATCH 14/17] =?UTF-8?q?feat:=20prompt=20request=20=EB=A1=9C?= =?UTF-8?q?=EC=A7=81=EC=97=90=20try-catch=20=EA=B5=AC=EB=AC=B8=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 당장은, Exception catch 를 해서 어떤 예외가 발생하는 확인 후 고도화 --- .../FrequentlyQuestionChatApiService.java | 41 +++++++++++-------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java index adfaaea..197c7bf 100644 --- a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java +++ b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java @@ -6,6 +6,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; +import org.springframework.ai.chat.model.ChatResponse; import org.springframework.ai.chat.prompt.Prompt; import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.SearchRequest; @@ -13,6 +14,7 @@ import org.springframework.stereotype.Service; import java.math.BigDecimal; +import java.util.List; import java.util.stream.Collectors; @Slf4j @@ -33,12 +35,32 @@ public FrequentlyQuestionChatResponseDto chatWithRag(FrequentlyQuestionChatReque .topK(8) .build()); log.info("hits 결과: {}", hits.stream().map(Document::getId).toList()); + try { + var response = execute(question, hits); + + var generation = response.getResult().getOutput(); + var metadata = response.getMetadata(); + + var usage = TokenUsage.from(metadata.getUsage()); + var price = calculateModelPrice(metadata.getModel(), usage); + + log.info("[{}] 토큰 사용량: {}, 토큰 비용: {}$\n결과: {}", metadata.getModel(), usage, price, generation.getText()); + return new FrequentlyQuestionChatResponseDto(generation.getText(), usage); + } catch (Exception e) { + log.warn("챗봇 응답 실패: {}", e.getMessage(), e); + return new FrequentlyQuestionChatResponseDto( + "챗봇 응답 생성 중 오류 발생했습니다.", + TokenUsage.EMPTY + ); + } + } - var context = hits.stream() + private ChatResponse execute(String question, List documents) { + var context = documents.stream() .map(d -> "## " + d.getMetadata().get("source") + "\n" + d.getText()) .collect(Collectors.joining("\n\n---\n\n")); - var response = chatClient.prompt(context) + return chatClient.prompt(question) .user(u -> u.text(""" 참고 문서: {context} @@ -49,21 +71,6 @@ public FrequentlyQuestionChatResponseDto chatWithRag(FrequentlyQuestionChatReque .param("question", question)) .call() .chatResponse(); - - if (response == null) { - return new FrequentlyQuestionChatResponseDto( - "응답이 없습니다.", TokenUsage.EMPTY - ); - } - - var generation = response.getResult().getOutput(); - var metadata = response.getMetadata(); - - var usage = TokenUsage.from(metadata.getUsage()); - var price = calculateModelPrice(metadata.getModel(), usage); - - log.info("[{}] 토큰 사용량: {}, 토큰 비용: {}$\n결과: {}", metadata.getModel(), usage, price, generation.getText()); - return new FrequentlyQuestionChatResponseDto(generation.getText(), usage); } private BigDecimal calculateModelPrice(String model, TokenUsage usage) { From 0e22dcdd67c329e587e15702b9847c79fdc2622f Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 26 May 2026 22:55:32 +0900 Subject: [PATCH 15/17] =?UTF-8?q?feat:=20faq=20=EC=8B=9C=EC=8A=A4=ED=85=9C?= =?UTF-8?q?=20=ED=94=84=EB=A1=AC=ED=94=84=ED=8A=B8=20=EA=B2=BD=EB=A1=9C=20?= =?UTF-8?q?application=20yml=20=EB=A1=9C=20=EB=B6=84=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/cholog/bootcamp/config/ChatClientConfig.java | 4 ++-- src/main/resources/application.yml | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java index db438c6..ebf940d 100644 --- a/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java +++ b/src/main/java/com/cholog/bootcamp/config/ChatClientConfig.java @@ -35,7 +35,7 @@ public class ChatClientConfig { @Bean public ChatClient chatClient(ChatClient.Builder builder, - @Value("classpath:prompts/faq-system.st") Resource systemTemplateResource) { + @Value("${app.chat.system-prompt}") Resource systemTemplateResource) { var systemTemplate = loadSystemTemplate(systemTemplateResource); @@ -48,7 +48,7 @@ public ChatClient chatClient(ChatClient.Builder builder, private String loadSystemTemplate(Resource systemTemplate) { try { String content = systemTemplate.getContentAsString(StandardCharsets.UTF_8); - log.info("faq 시스템 프롬프트를 로드합니다. 길이: {}", content.length()); + log.info("faq 시스템 프롬프트를 로드합니다. 경로: {}, 길이: {}", systemTemplate.getURI(), content.length()); return content; } catch (Exception e) { log.warn("faq 시스템 프롬프트 로드중 문제가 발생했습니다.", e); diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index dcec8fe..950ca16 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -11,3 +11,7 @@ spring: embedding: options: model: text-embedding-3-small + +app: + chat: + system-prompt: classpath:prompts/faq-system.st From 9bd53e9c1559ec322cf05ba0913e4112c5baedfe Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 26 May 2026 23:23:54 +0900 Subject: [PATCH 16/17] =?UTF-8?q?feat:=20layer=20=EB=B3=84=20=EC=B5=9C?= =?UTF-8?q?=EC=86=8C=20=EA=B2=80=EC=83=89=20=EA=B0=9C=EC=88=98=20=EC=A7=80?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - FilterExpression 통해서 layer 필터링 및 검색 --- .../FrequentlyQuestionChatApiService.java | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java index 197c7bf..f27a00d 100644 --- a/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java +++ b/src/main/java/com/cholog/bootcamp/service/FrequentlyQuestionChatApiService.java @@ -7,13 +7,14 @@ import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.model.ChatResponse; -import org.springframework.ai.chat.prompt.Prompt; import org.springframework.ai.document.Document; import org.springframework.ai.vectorstore.SearchRequest; import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder; import org.springframework.stereotype.Service; import java.math.BigDecimal; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; @@ -30,11 +31,7 @@ public FrequentlyQuestionChatResponseDto chatWithRag(FrequentlyQuestionChatReque String question = requestDto.question(); - var hits = vectorStore.similaritySearch(SearchRequest.builder() - .query(question) - .topK(8) - .build()); - log.info("hits 결과: {}", hits.stream().map(Document::getId).toList()); + var hits = searchContext(question); try { var response = execute(question, hits); @@ -55,12 +52,46 @@ public FrequentlyQuestionChatResponseDto chatWithRag(FrequentlyQuestionChatReque } } + private List searchContext(String question) { + var documents = new ArrayList(); + documents.addAll(searchByLayer(question, 3, "policy")); + documents.addAll(searchByLayer(question, 4, "faq")); + documents.addAll(searchByLayer(question, 1, "example")); + + log.info("hits 결과: {}", + documents.stream() + .map(document -> "%s(title=%s, layer=%s, source=%s)".formatted( + document.getId(), + document.getMetadata().get("title"), + document.getMetadata().get("layer"), + document.getMetadata().get("source") + )) + .toList()); + + log.info("layer 별 hits 결과: {}", + documents.stream().collect(Collectors.groupingBy( + document -> document.getMetadata().get("layer"), + Collectors.counting() + ))); + return documents; + } + + private List searchByLayer(String question, int topK, String layer) { + var filter = new FilterExpressionBuilder(); + + return vectorStore.similaritySearch(SearchRequest.builder() + .query(question) + .topK(topK) + .filterExpression(filter.eq("layer", layer).build()) + .build()); + } + private ChatResponse execute(String question, List documents) { var context = documents.stream() .map(d -> "## " + d.getMetadata().get("source") + "\n" + d.getText()) .collect(Collectors.joining("\n\n---\n\n")); - return chatClient.prompt(question) + return chatClient.prompt() .user(u -> u.text(""" 참고 문서: {context} From 8eecabd7c77867b085a8bfdff14f577899b9c655 Mon Sep 17 00:00:00 2001 From: youngsu5582 <98307410+youngsu5582@users.noreply.github.com> Date: Tue, 26 May 2026 23:55:33 +0900 Subject: [PATCH 17/17] =?UTF-8?q?feat:=20spring=20ai=20property=20?= =?UTF-8?q?=EA=B8=B0=EB=B0=98=20=EC=9E=AC=EC=8B=9C=EB=8F=84=20=EB=A1=9C?= =?UTF-8?q?=EC=A7=81=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/resources/application.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 950ca16..cf03fc7 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -2,6 +2,20 @@ spring: application: name: spring-ai-bootcamp-basic ai: + retry: + max-attempts: 3 # 최대 시도(최초 호출 포함) + backoff: + initial-interval: 500ms # 첫 대기 시간 + multiplier: 2 # 요청당 시간을 몇배로 늘릴지 + max-interval: 3s + exclude-on-http-codes: # 재시도 제외 목록 + - 408 + - 429 + - 500 + - 502 + - 503 + - 504 + on-client-errors: false # 4xx 에러 전체를 재시도로 볼지 openai: api-key: ${OPENAI_API_KEY} chat: