-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.cpp
More file actions
57 lines (47 loc) · 2.78 KB
/
main.cpp
File metadata and controls
57 lines (47 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include <iostream>
#include <chrono>
#include <string>
#include "styletts2.h"
#include "wavfile.hpp"
int main() {
try {
// Text to synthesize
std::string text = "Every time I'm done sorting my album and look out the window, I always want to take another picture... Even though I've seen the view a thousand times, I still feel happy looking at the stars like this.";
// Paths configuration
std::string modelDir = "trained_models";
std::ofstream audioFile("test.wav", std::ios::binary);
std::cout << "Initializing StyleTTS2..." << std::endl;
// Initialize StyleTTS2 (loads all models)
auto startTime = std::chrono::steady_clock::now();
StyleTTS2 tts(modelDir, "path-to-espeak/espeak-ng/share/espeak-ng-data", false); // false = use CPU, true = use CUDA
tts.LoadStyle(modelDir + "/ref_s.bin", modelDir + "/ref_p.bin");
auto initEndTime = std::chrono::steady_clock::now();
auto initDuration = std::chrono::duration<double>(initEndTime - startTime);
std::cout << "Initialization took: " << initDuration.count() << " seconds" << std::endl;
std::cout << "Synthesizing speech..." << std::endl;
// Synthesize with default voice (from ref_s.bin and ref_p.bin)
auto inferStartTime = std::chrono::steady_clock::now();
std::vector<int16_t> audioBuffer = tts.synthesize(text, 1.0f); // speed = 1.0
auto inferEndTime = std::chrono::steady_clock::now();
auto inferDuration = std::chrono::duration<double>(inferEndTime - inferStartTime);
// Save to WAV file
writeWavHeader(24000, 2, 1, (int32_t)audioBuffer.size(), audioFile);
audioFile.write((const char *)audioBuffer.data(), sizeof(int16_t) * audioBuffer.size());
audioFile.close();
auto endTime = std::chrono::steady_clock::now();
auto totalDuration = std::chrono::duration<double>(endTime - startTime);
// Print statistics
double audioDuration = (double)audioBuffer.size() / 24000;
std::cout << "\n=== Results ===" << std::endl;
std::cout << "Initialization time: " << initDuration.count() << " seconds" << std::endl;
std::cout << "Inference time: " << inferDuration.count() << " seconds" << std::endl;
std::cout << "Total runtime: " << totalDuration.count() << " seconds" << std::endl;
std::cout << "Audio duration: " << audioDuration << " seconds" << std::endl;
std::cout << "Real-time factor: " << inferDuration.count() / audioDuration << "x" << std::endl;
std::cout << "\nSynthesis completed successfully!" << std::endl;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}