Skip to content

Commit fce8858

Browse files
authored
ut for audio and vision encode function (#489)
## Summary <!-- Include a short paragraph of the changes introduced in this PR. If this PR requires additional context or rationale, explain why the changes are necessary. --> pytest tests/unit/extras/test_vision.py pytest tests/unit/extras/test_audio.py ## Details <!-- Provide a detailed list of all changes introduced in this pull request. --> - [ ] ## Test Plan <!-- List the steps needed to test this PR. --> - ## Related Issues <!-- Link any relevant issues that this PR addresses. --> - Resolves # --- - [ ] "I certify that all code in this PR is my own, except as noted below." ## Use of AI - [ ] Includes AI-assisted code completion - [ ] Includes code generated by an AI application - [ ] Includes AI-generated tests (NOTE: AI written tests should have a docstring that includes `## WRITTEN BY AI ##`)
2 parents d527257 + 2600d0e commit fce8858

File tree

4 files changed

+507
-0
lines changed

4 files changed

+507
-0
lines changed

.github/actions/run-tox/action.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ runs:
1717
uses: pdm-project/setup-pdm@v4
1818
with:
1919
python-version: ${{ inputs.python-version }}
20+
- name: Install system dependencies
21+
run: |
22+
sudo apt install -y ffmpeg
23+
shell: bash
2024
- name: Install dependencies
2125
run: |
2226
pip install tox tox-pdm

tests/unit/extras/__init__.py

Whitespace-only changes.

tests/unit/extras/test_audio.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
import tempfile
2+
import wave
3+
from pathlib import Path
4+
from unittest.mock import MagicMock, patch
5+
6+
import numpy as np
7+
import pytest
8+
import torch
9+
10+
from guidellm.extras.audio import encode_audio
11+
12+
13+
@pytest.fixture
14+
def sample_audio_tensor():
15+
sample_rate = 16000
16+
t = torch.linspace(0, 1, sample_rate)
17+
return 0.3 * torch.sin(2 * np.pi * 440 * t).unsqueeze(0)
18+
19+
20+
@pytest.fixture
21+
def sample_wav_file(sample_audio_tensor):
22+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
23+
f.write(b"fake_wav_content")
24+
temp_path = Path(f.name)
25+
yield temp_path
26+
27+
if temp_path.exists():
28+
temp_path.unlink()
29+
30+
31+
@pytest.fixture
32+
def real_wav_file():
33+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
34+
sample_rate = 16000
35+
duration = 1.0
36+
t = np.linspace(0, duration, int(sample_rate * duration))
37+
audio_data = (np.sin(2 * np.pi * 440 * t) * 32767).astype(np.int16)
38+
39+
with wave.open(f.name, "wb") as wav_file:
40+
wav_file.setnchannels(1)
41+
wav_file.setsampwidth(2)
42+
wav_file.setframerate(sample_rate)
43+
wav_file.writeframes(audio_data.tobytes())
44+
45+
temp_path = Path(f.name)
46+
47+
yield temp_path
48+
49+
if temp_path.exists():
50+
temp_path.unlink()
51+
52+
53+
def test_encode_audio_with_tensor_input(sample_audio_tensor):
54+
result = encode_audio(
55+
audio=sample_audio_tensor,
56+
sample_rate=16000,
57+
audio_format="mp3",
58+
bitrate="64k",
59+
b64encode=False,
60+
)
61+
62+
assert result["type"] == "audio_file"
63+
assert isinstance(result["audio"], bytes)
64+
assert result["format"] == "mp3"
65+
assert result["mimetype"] == "audio/mp3"
66+
assert result["audio_samples"] == 16000
67+
assert result["audio_seconds"] == 1.0
68+
assert isinstance(result["audio_bytes"], int)
69+
assert result["audio_bytes"] > 0
70+
71+
72+
def test_encode_audio_with_base64(sample_audio_tensor):
73+
result = encode_audio(audio=sample_audio_tensor, sample_rate=16000, b64encode=True)
74+
75+
assert result["type"] == "audio_base64"
76+
assert isinstance(result["audio"], str)
77+
import base64
78+
79+
try:
80+
decoded = base64.b64decode(result["audio"])
81+
assert len(decoded) > 0
82+
except (base64.binascii.Error, ValueError) as e:
83+
pytest.fail(f"Invalid base64 encoding: {e}")
84+
85+
86+
def test_encode_audio_with_numpy_array(sample_audio_tensor):
87+
numpy_audio = sample_audio_tensor.numpy()
88+
89+
result = encode_audio(audio=numpy_audio, sample_rate=16000)
90+
91+
assert result["type"] == "audio_file"
92+
assert isinstance(result["audio"], bytes)
93+
assert result["audio_bytes"] > 0
94+
95+
96+
def test_encode_audio_with_real_file_path(real_wav_file):
97+
result = encode_audio(audio=real_wav_file, sample_rate=16000, max_duration=1.0)
98+
99+
assert result["type"] == "audio_file"
100+
assert isinstance(result["audio"], bytes)
101+
assert result["format"] == "mp3"
102+
assert result["mimetype"] == "audio/mp3"
103+
assert result["file_name"] == Path(real_wav_file).name
104+
assert result["audio_bytes"] > 0
105+
assert result["audio_seconds"] <= 1.0
106+
107+
108+
def test_encode_audio_with_dict_input_complete():
109+
audio_dict = {"data": torch.randn(1, 16000), "sample_rate": 16000}
110+
111+
result = encode_audio(audio=audio_dict)
112+
113+
assert result["type"] == "audio_file"
114+
assert result["audio_bytes"] > 0
115+
assert result["audio_samples"] == 16000
116+
assert result["audio_seconds"] == 1.0
117+
118+
119+
@patch("httpx.get")
120+
@patch("guidellm.extras.audio._encode_audio")
121+
def test_encode_audio_with_url(mock_http_get, sample_audio_tensor):
122+
# mock http get response
123+
mock_response = MagicMock()
124+
mock_response.content = b"fake_audio_content"
125+
mock_response.raise_for_status = MagicMock()
126+
mock_http_get.return_value = mock_response
127+
128+
# mock decode - return sample audio tensor
129+
with patch("guidellm.extras.audio._decode_audio") as mock_decoder:
130+
mock_audio_result = MagicMock()
131+
mock_audio_result.data = sample_audio_tensor
132+
mock_audio_result.sample_rate = 16000
133+
mock_decoder.return_value = mock_audio_result
134+
135+
result = encode_audio(audio="https://example.com/audio.wav", sample_rate=16000)
136+
assert result["type"] == "audio_file"
137+
138+
139+
def test_encode_audio_with_max_duration(sample_audio_tensor):
140+
long_audio = torch.randn(1, 32000)
141+
142+
result = encode_audio(audio=long_audio, sample_rate=16000, max_duration=1.0)
143+
144+
assert result["audio_seconds"] == 1.0
145+
146+
147+
def test_encode_audio_different_formats(sample_audio_tensor):
148+
formats = ["mp3", "wav", "flac"]
149+
150+
for fmt in formats:
151+
result = encode_audio(
152+
audio=sample_audio_tensor, sample_rate=16000, audio_format=fmt
153+
)
154+
155+
assert result["format"] == fmt
156+
assert result["mimetype"] == f"audio/{fmt}"
157+
assert result["audio_bytes"] > 0
158+
159+
160+
def test_encode_audio_resampling(sample_audio_tensor):
161+
original_rate = 16000
162+
target_rate = 8000
163+
164+
result = encode_audio(
165+
audio=sample_audio_tensor,
166+
sample_rate=original_rate,
167+
encode_sample_rate=target_rate,
168+
)
169+
170+
assert "audio_samples" in result
171+
172+
173+
def test_encode_audio_error_handling():
174+
with pytest.raises(ValueError):
175+
encode_audio(audio=123)
176+
177+
with pytest.raises(ValueError):
178+
encode_audio(audio=torch.randn(1, 16000), sample_rate=None)
179+
180+
with pytest.raises(ValueError):
181+
encode_audio(audio="/nonexistent/path/audio.wav")
182+
183+
184+
def test_audio_quality_preservation(sample_audio_tensor):
185+
result = encode_audio(
186+
audio=sample_audio_tensor,
187+
sample_rate=16000,
188+
audio_format="mp3",
189+
bitrate="128k",
190+
)
191+
192+
assert len(result["audio"]) > 1000
193+
194+
195+
def test_end_to_end_audio_processing(sample_audio_tensor):
196+
original_samples = sample_audio_tensor.shape[1]
197+
original_duration = original_samples / 16000
198+
199+
result = encode_audio(
200+
audio=sample_audio_tensor,
201+
sample_rate=16000,
202+
audio_format="mp3",
203+
bitrate="64k",
204+
b64encode=True,
205+
max_duration=0.5,
206+
)
207+
208+
assert result["type"] == "audio_base64"
209+
assert isinstance(result["audio"], str)
210+
assert result["format"] == "mp3"
211+
assert result["audio_samples"] == 16000
212+
assert result["audio_seconds"] <= original_duration

0 commit comments

Comments
 (0)