From a96cff65d2d00ffaf22aa24e5ce12f1177f135bc Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Fri, 1 Aug 2025 11:39:07 +0800 Subject: [PATCH 1/4] =?UTF-8?q?tts=E6=8E=A5=E5=8F=A3=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E8=BF=94=E5=9B=9Emp3=E6=96=87=E4=BB=B6=E7=BC=96=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration_tests/test_audio.py | 3 ++- zhipuai/api_resource/audio/audio.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 2ef05d6..aa9bb4c 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -38,7 +38,8 @@ def test_audio_speech_streaming(logging_conf): input='你好呀,欢迎来到智谱开放平台', voice='tongtong', stream=True, - response_format='wav', + response_format='mp3', + encode_format='hex' ) with open("output.pcm", "wb") as f: for item in response: diff --git a/zhipuai/api_resource/audio/audio.py b/zhipuai/api_resource/audio/audio.py index d916048..4181b2b 100644 --- a/zhipuai/api_resource/audio/audio.py +++ b/zhipuai/api_resource/audio/audio.py @@ -51,6 +51,7 @@ def speech( extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + encode_format: str, ) -> _legacy_response.HttpxBinaryResponseContent | StreamResponse[AudioSpeechChunk]: body = deepcopy_minimal( { @@ -61,7 +62,8 @@ def speech( "response_format": response_format, "sensitive_word_check": sensitive_word_check, "request_id": request_id, - "user_id": user_id + "user_id": user_id, + "encode_format": encode_format } ) return self._post( From 4a3831d7058dfd5112c54ee3cf05ff7234f1f582 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Fri, 1 Aug 2025 16:27:17 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E8=AE=BE=E7=BD=AEtts=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=E5=80=BC=E6=98=AFbase64?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/integration_tests/test_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index aa9bb4c..c6a08ab 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -39,7 +39,7 @@ def test_audio_speech_streaming(logging_conf): voice='tongtong', stream=True, response_format='mp3', - encode_format='hex' + encode_format='base64' ) with open("output.pcm", "wb") as f: for item in response: From 3870a281a46fb758f5dbe96ef391943e85f4c336 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Fri, 1 Aug 2025 17:02:01 +0800 Subject: [PATCH 3/4] fix: Modify the test cases for text - to - mp3 format audio conversion --- tests/integration_tests/test_audio.py | 59 +++++++++++++-------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index c6a08ab..8140a66 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -30,36 +30,35 @@ def test_audio_speech(logging_conf): print(err) def test_audio_speech_streaming(logging_conf): - logging.config.dictConfig(logging_conf) # type: ignore - client = ZhipuAI() # 填写您自己的APIKey - try: - response = client.audio.speech( - model='cogtts', - input='你好呀,欢迎来到智谱开放平台', - voice='tongtong', - stream=True, - response_format='mp3', - encode_format='base64' - ) - with open("output.pcm", "wb") as f: - for item in response: - choice = item.choices[0] - index = choice.index - finish_reason = choice.finish_reason - audio_delta = choice.delta.content - if finish_reason is not None: - break - f.write(base64.b64decode(audio_delta)) - print(f"{index}.finish_reason = {finish_reason}, audio_delta = {len(audio_delta)}") - - except zhipuai.core._errors.APIRequestFailedError as err: - print(err) - except zhipuai.core._errors.APIInternalError as err: - print(err) - except zhipuai.core._errors.APIStatusError as err: - print(err) - except Exception as e: - print(e) + logging.config.dictConfig(logging_conf) # type: ignore + client = ZhipuAI() # 填写您自己的APIKey + try: + response = client.audio.speech( + model='cogtts', + input='你好呀,欢迎来到智谱开放平台', + voice='tongtong', + stream=True, + response_format='mp3', + encode_format='hex' + ) + with open("output.mp3", "wb") as f: + for item in response: + choice = item.choices[0] + index = choice.index + finish_reason = choice.finish_reason + audio_delta = choice.delta.content + if finish_reason is not None: + break + f.write(bytes.fromhex(audio_delta)) + print(f"audio delta: {audio_delta[:64]}..., 长度:{len(audio_delta)}") + except zhipuai.core._errors.APIRequestFailedError as err: + print(err) + except zhipuai.core._errors.APIInternalError as err: + print(err) + except zhipuai.core._errors.APIStatusError as err: + print(err) + except Exception as e: + print(e) def test_audio_customization(logging_conf): From ae3e5d3dc1aebbe6d6c74f18d822ea5c4a488415 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Fri, 1 Aug 2025 18:37:52 +0800 Subject: [PATCH 4/4] test finish update -version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b593b81..492c99e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zhipuai" -version = "2.1.5.20250726" +version = "2.1.5.20250801" description = "A SDK library for accessing big model apis from ZhipuAI" authors = ["Zhipu AI"] readme = "README.md"