-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.cpp
More file actions
42 lines (39 loc) · 2.13 KB
/
utils.cpp
File metadata and controls
42 lines (39 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include "utils.h"
#include <iostream>
std::string to_lower_util(std::string s) {
std::transform(s.begin(), s.end(), s.begin(),
[](unsigned char c){ return std::tolower(c); });
return s;
}
std::string trim_string_util(const std::string& str) {
const std::string whitespace = " \t\n\r\f\v";
size_t start = str.find_first_not_of(whitespace);
if (start == std::string::npos) return "";
size_t end = str.find_last_not_of(whitespace);
return str.substr(start, (end - start + 1));
}
std::string cleanup_stt_artifacts_util(std::string text) {
std::string result = trim_string_util(text);
if (result.empty()) return result;
try {
result = std::regex_replace(result, std::regex("\\[BLANK_AUDIO\\]", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\(sighs\\)", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\[\\s*Silence\\s*\\]", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\(silence\\)", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\(um\\)", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\(uh\\)", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\[noise\\]", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\[Laughter\\]", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\(music\\)", std::regex_constants::icase), "");
result = std::regex_replace(result, std::regex("\\[music\\]", std::regex_constants::icase), "");
} catch (const std::regex_error& e) {
std::cerr << "Regex error during artifact cleaning: " << e.what() << std::endl;
}
result = trim_string_util(result);
try {
result = std::regex_replace(result, std::regex(" {2,}"), " ");
} catch (const std::regex_error& e) {
std::cerr << "Regex error during space consolidation: " << e.what() << std::endl;
}
return trim_string_util(result);
}