diff --git a/CMakeLists.txt b/CMakeLists.txt index d3cc801..2e2b752 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,7 +123,7 @@ add_subdirectory(asr) add_subdirectory(translate) add_subdirectory(tts) add_subdirectory(pipeline) -if(VOX_BUILD_APPS) +if(VOX_BUILD_APPS OR VOX_BUILD_TESTS) add_subdirectory(apps) endif() diff --git a/README.md b/README.md index 062ba3a..ff18a55 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,19 @@ cmake --build build --target vox -j ## Model +The `vox` CLI can list, download, verify, repair, and remove known local models: + +```sh +./build/bin/vox model list +./build/bin/vox model download qwen3-asr-1.7b +./build/bin/vox model verify qwen3-asr-1.7b +./build/bin/vox model repair qwen3-asr-1.7b +``` + +Model verification checks that expected files exist, are non-empty, and do not +have leftover partial downloads. Checksums are reported when metadata is +available; the current bundled manifests rely on file presence and size. + ### Whisper ASR Download or place a local Whisper GGML model under `models/`. For multilingual recognition, use a non-`.en` model. diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 58bd912..820c017 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -1,3 +1,13 @@ +add_library(vox_model_manager STATIC + model_manager.cpp +) + +target_include_directories(vox_model_manager + PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}" +) + +if(VOX_BUILD_APPS) add_executable(vox vox.cpp microphone_audio_source.cpp @@ -5,6 +15,7 @@ add_executable(vox target_link_libraries(vox PRIVATE + vox_model_manager vox_translation_pipeline vox_sdl_audio ) @@ -18,3 +29,4 @@ target_compile_definitions(vox PRIVATE VOX_PROJECT_ROOT="${CMAKE_SOURCE_DIR}" ) +endif() diff --git a/apps/model_manager.cpp b/apps/model_manager.cpp new file mode 100644 index 0000000..7e60a84 --- /dev/null +++ b/apps/model_manager.cpp @@ -0,0 +1,338 @@ +#include "model_manager.h" + +#include +#include +#include +#include +#include + +namespace vox::app::model { +namespace { + +std::filesystem::path model_path(const std::filesystem::path & project_root, const std::string & relative_path) { + return project_root / std::filesystem::path(relative_path); +} + +std::string format_size(uintmax_t bytes) { + const char * units[] = {"B", "KiB", "MiB", "GiB"}; + double value = static_cast(bytes); + size_t unit = 0; + while (value >= 1024.0 && unit + 1 < std::size(units)) { + value /= 1024.0; + ++unit; + } + char buffer[64]; + if (unit == 0) { + std::snprintf(buffer, sizeof(buffer), "%llu %s", static_cast(bytes), units[unit]); + } else { + std::snprintf(buffer, sizeof(buffer), "%.1f %s", value, units[unit]); + } + return buffer; +} + +const char * status_name(const ManagedModelStatus & status) { + if (status.complete) { + return "installed"; + } + if (status.installed || status.has_partial_download) { + return "incomplete"; + } + return "missing"; +} + +void print_model_usage(std::ostream & out) { + out << "usage: vox model [model-name]\n" + << "\n" + << "commands:\n" + << " list [--installed] list supported models and local status\n" + << " download download a supported model\n" + << " verify verify local model files exist and are non-empty\n" + << " repair remove incomplete files, then download missing files\n" + << " remove remove local files for a model\n" + << "\n" + << "Run 'vox model list' to see model names.\n"; +} + +int run_download_command(const ManagedModel & model, const std::filesystem::path & project_root, std::ostream & err) { + if (model.download_command.empty()) { + err << "No download command is available for " << model.name << ".\n"; + return 1; + } + + std::error_code ec; + const std::filesystem::path previous = std::filesystem::current_path(ec); + if (ec) { + err << "Could not read current directory: " << ec.message() << "\n"; + return 1; + } + std::filesystem::current_path(project_root, ec); + if (ec) { + err << "Could not enter project directory " << project_root << ": " << ec.message() << "\n"; + return 1; + } + const int result = std::system(model.download_command.c_str()); + std::error_code restore_ec; + std::filesystem::current_path(previous, restore_ec); + if (restore_ec) { + err << "Could not restore working directory " << previous << ": " << restore_ec.message() << "\n"; + if (result != 0) { + err << "Download command failed for " << model.name << ".\n"; + } + return 1; + } + if (result != 0) { + err << "Download command failed for " << model.name << ".\n"; + return 1; + } + return 0; +} + +void print_model_details(const ManagedModel & model, const ManagedModelStatus & status, std::ostream & out) { + out << model.name << " (" << status_name(status) << ")\n" + << " description: " << model.description << "\n" + << " source: " << model.source << "\n" + << " version: " << model.version << "\n" + << " checksum: " << (model.checksum.empty() ? "unavailable" : model.checksum) << "\n"; + for (size_t i = 0; i < model.files.size(); ++i) { + const ManagedModelFile & file = model.files[i]; + const ManagedModelFileStatus & file_status = status.files[i]; + out << " file: " << file_status.path.string() + << " [" << (file_status.complete ? "ok" : (file_status.exists ? "empty" : "missing")) << "]"; + if (file_status.exists) { + out << " size=" << format_size(file_status.size); + } else if (!file.size_hint.empty()) { + out << " expected=" << file.size_hint; + } + out << "\n"; + } + if (status.has_partial_download) { + out << " partial download detected; run 'vox model repair " << model.name << "'.\n"; + } +} + +int require_model_name(const std::vector & args, std::ostream & err) { + if (args.size() < 2) { + err << "Missing model name.\n"; + return 1; + } + if (args.size() > 2) { + err << "Too many arguments for model command.\n"; + return 1; + } + return 0; +} + +} // namespace + +const std::vector & supported_models() { + static const std::vector models = { + { + "qwen3-asr-1.7b", + "Default Qwen3-ASR GGUF model and multimodal projector", + "ggml-org/Qwen3-ASR-1.7B-GGUF", + "1.7B Q8_0", + "", + { + {"models/asr/qwen3-asr-1.7b/Qwen3-ASR-1.7B-Q8_0.gguf", "about 2.17 GB"}, + {"models/asr/qwen3-asr-1.7b/mmproj-Qwen3-ASR-1.7B-Q8_0.gguf", "about 356 MB"}, + }, + "scripts/download-qwen3-asr-gguf.sh", + }, + { + "qwen3-asr-0.6b", + "Smaller Qwen3-ASR GGUF model and multimodal projector", + "ggml-org/Qwen3-ASR-0.6B-GGUF", + "0.6B Q8_0", + "", + { + {"models/asr/qwen3-asr-0.6b/Qwen3-ASR-0.6B-Q8_0.gguf", ""}, + {"models/asr/qwen3-asr-0.6b/mmproj-Qwen3-ASR-0.6B-Q8_0.gguf", ""}, + }, + "scripts/download-qwen3-asr-gguf.sh 0.6B Q8_0 models/asr/qwen3-asr-0.6b", + }, + { + "whisper-base", + "Whisper.cpp base GGML ASR model", + "ggerganov/whisper.cpp", + "base", + "", + {{"models/ggml-base.bin", ""}}, + "./external/whisper.cpp/models/download-ggml-model.sh base models", + }, + { + "hymt-translate", + "Tencent HY-MT1.5 translation GGUF model", + "tencent/HY-MT1.5-1.8B-GGUF", + "1.8B Q4_K_M", + "", + {{"models/translate/HY-MT1.5-1.8B-Q4_K_M.gguf", "about 1.13 GB"}}, + "scripts/download-hymt-gguf.sh", + }, + { + "cosyvoice3-tts", + "Minimum baked-voice CosyVoice3 TTS GGUF set", + "cstr/cosyvoice3-0.5b-2512-GGUF", + "q4_k/q8_0/f16", + "", + { + {"models/tts/cosyvoice3/cosyvoice3-llm-q4_k.gguf", ""}, + {"models/tts/cosyvoice3/cosyvoice3-flow-q8_0.gguf", ""}, + {"models/tts/cosyvoice3/cosyvoice3-hift-f16.gguf", ""}, + {"models/tts/cosyvoice3/cosyvoice3-voices.gguf", ""}, + }, + "scripts/download-cosyvoice3-tts-gguf.sh", + }, + }; + return models; +} + +const ManagedModel * find_model(const std::string & name) { + for (const ManagedModel & model : supported_models()) { + if (model.name == name) { + return &model; + } + } + return nullptr; +} + +ManagedModelStatus inspect_model(const ManagedModel & model, const std::filesystem::path & project_root) { + ManagedModelStatus status; + status.model = &model; + status.complete = true; + for (const ManagedModelFile & file : model.files) { + ManagedModelFileStatus file_status; + file_status.path = model_path(project_root, file.relative_path); + + std::error_code ec; + file_status.exists = std::filesystem::is_regular_file(file_status.path, ec); + if (file_status.exists) { + file_status.size = std::filesystem::file_size(file_status.path, ec); + if (ec) { + file_status.size = 0; + } + file_status.complete = file_status.size > 0; + status.installed = true; + } + status.complete = status.complete && file_status.complete; + + const std::filesystem::path partial = file_status.path.string() + ".part"; + if (std::filesystem::exists(partial, ec)) { + status.has_partial_download = true; + } + status.files.push_back(file_status); + } + status.complete = status.complete && !status.has_partial_download; + return status; +} + +int run_model_command( + const std::vector & args, + const std::filesystem::path & project_root, + std::ostream & out, + std::ostream & err) { + if (args.empty() || args[0] == "-h" || args[0] == "--help" || args[0] == "help") { + print_model_usage(out); + return 0; + } + + const std::string command = args[0]; + if (command == "list") { + bool installed_only = false; + if (args.size() > 2 || (args.size() == 2 && args[1] != "--installed")) { + err << "usage: vox model list [--installed]\n"; + return 1; + } + installed_only = args.size() == 2; + for (const ManagedModel & model : supported_models()) { + const ManagedModelStatus status = inspect_model(model, project_root); + if (installed_only && !status.installed && !status.has_partial_download) { + continue; + } + out << model.name << "\t" << status_name(status) << "\t" << model.version << "\t"; + if (!model.files.empty()) { + out << model_path(project_root, model.files.front().relative_path).string(); + } + out << "\n"; + } + return 0; + } + + if (command != "download" && command != "verify" && command != "repair" && command != "remove") { + err << "Unknown model command: " << command << "\n"; + print_model_usage(err); + return 1; + } + if (require_model_name(args, err) != 0) { + return 1; + } + + const ManagedModel * model = find_model(args[1]); + if (!model) { + err << "Unknown model: " << args[1] << "\n" + << "Run 'vox model list' to see supported models.\n"; + return 1; + } + + if (command == "verify") { + const ManagedModelStatus status = inspect_model(*model, project_root); + print_model_details(*model, status, out); + if (!status.complete) { + err << "Model is missing or incomplete. Run 'vox model repair " << model->name << "'.\n"; + return 1; + } + return 0; + } + + if (command == "remove") { + std::error_code ec; + for (const ManagedModelFile & file : model->files) { + const std::filesystem::path path = model_path(project_root, file.relative_path); + const bool removed = std::filesystem::remove(path, ec); + if (ec) { + err << "Could not remove " << path << ": " << ec.message() << "\n"; + return 1; + } + std::filesystem::remove(path.string() + ".part", ec); + out << (removed ? "Removed: " : "Not installed: ") << path.string() << "\n"; + } + return 0; + } + + if (command == "repair") { + std::error_code ec; + const ManagedModelStatus status = inspect_model(*model, project_root); + for (size_t i = 0; i < model->files.size(); ++i) { + const std::filesystem::path path = status.files[i].path; + if (status.files[i].exists && !status.files[i].complete) { + std::filesystem::remove(path, ec); + if (ec) { + err << "Could not remove incomplete file " << path << ": " << ec.message() << "\n"; + return 1; + } + out << "Removed incomplete file: " << path.string() << "\n"; + } + std::filesystem::remove(path.string() + ".part", ec); + } + } + + const ManagedModelStatus before = inspect_model(*model, project_root); + if (before.complete) { + out << "Model already installed: " << model->name << "\n"; + return 0; + } + out << "Downloading " << model->name << " using: " << model->download_command << "\n"; + const int download_result = run_download_command(*model, project_root, err); + if (download_result != 0) { + return download_result; + } + const ManagedModelStatus after = inspect_model(*model, project_root); + if (!after.complete) { + err << "Model is still incomplete after download: " << model->name + << " (" << status_name(after) << "). Run 'vox model repair " << model->name << "'.\n"; + return 1; + } + out << "Model installed: " << model->name << "\n"; + return 0; +} + +} // namespace vox::app::model diff --git a/apps/model_manager.h b/apps/model_manager.h new file mode 100644 index 0000000..0641f0c --- /dev/null +++ b/apps/model_manager.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace vox::app::model { + +struct ManagedModelFile { + std::string relative_path; + std::string size_hint; +}; + +struct ManagedModel { + std::string name; + std::string description; + std::string source; + std::string version; + std::string checksum; + std::vector files; + std::string download_command; +}; + +struct ManagedModelFileStatus { + std::filesystem::path path; + bool exists = false; + bool complete = false; + uintmax_t size = 0; +}; + +struct ManagedModelStatus { + const ManagedModel * model = nullptr; + std::vector files; + bool has_partial_download = false; + bool installed = false; + bool complete = false; +}; + +const std::vector & supported_models(); +const ManagedModel * find_model(const std::string & name); +ManagedModelStatus inspect_model(const ManagedModel & model, const std::filesystem::path & project_root); +int run_model_command( + const std::vector & args, + const std::filesystem::path & project_root, + std::ostream & out, + std::ostream & err); + +} // namespace vox::app::model diff --git a/apps/vox.cpp b/apps/vox.cpp index f92ca88..6dac04d 100644 --- a/apps/vox.cpp +++ b/apps/vox.cpp @@ -1,6 +1,7 @@ #include "async_transcript_translator.h" #include "async_text_to_speech.h" #include "microphone_audio_source.h" +#include "model_manager.h" #include "streaming_qwen_asr.h" #include "streaming_whisper.h" @@ -346,6 +347,10 @@ CliOptions parse_cli(int argc, char ** argv) { void print_usage(const char * program) { std::cout << "usage: " << program << " [options] [asr_model] [language] [translation_model] [target_language]\n" + << " " << program << " model [model-name]\n" + << "\n" + << "commands:\n" + << " model list, download, verify, repair, or remove local models\n" << "\n" << "options:\n" << " --asr-engine NAME\n" @@ -449,6 +454,14 @@ int main(int argc, char ** argv) { std::signal(SIGTERM, stop); try { + if (argc >= 2 && std::string(argv[1]) == "model") { + std::vector args; + for (int i = 2; i < argc; ++i) { + args.push_back(argv[i]); + } + return vox::app::model::run_model_command(args, VOX_PROJECT_ROOT, std::cout, std::cerr); + } + const CliOptions cli = parse_cli(argc, argv); if (cli.show_help) { print_usage(argv[0]); @@ -499,9 +512,9 @@ int main(int argc, char ** argv) { if (!file_exists(common_config.model_path)) { std::cerr << "Missing ASR model: " << common_config.model_path << "\n"; if (asr_engine == AsrEngine::Whisper) { - std::cerr << "Download: ./external/whisper.cpp/models/download-ggml-model.sh base models\n"; + std::cerr << "Download: vox model download whisper-base\n"; } else { - std::cerr << "Download: scripts/download-qwen3-asr-gguf.sh\n"; + std::cerr << "Download: vox model download qwen3-asr-1.7b\n"; } return 1; } @@ -512,7 +525,7 @@ int main(int argc, char ** argv) { !cli.asr_mmproj_path.empty() ? cli.asr_mmproj_path : default_qwen_mmproj); if (!file_exists(qwen_mmproj_path)) { std::cerr << "Missing Qwen3-ASR mmproj: " << qwen_mmproj_path << "\n" - << "Download: scripts/download-qwen3-asr-gguf.sh\n"; + << "Download: vox model download qwen3-asr-1.7b\n"; return 1; } } @@ -543,7 +556,7 @@ int main(int argc, char ** argv) { cli.positional.size() > 2 ? resolve_model_path(cli.positional[2]) : std::string(); if (!translation_model_path.empty() && !file_exists(translation_model_path)) { std::cerr << "Missing translation model: " << translation_model_path << "\n" - << "Download: scripts/download-hymt-gguf.sh\n"; + << "Download: vox model download hymt-translate\n"; return 1; } @@ -564,7 +577,7 @@ int main(int argc, char ** argv) { tts_model_path = resolve_model_path(cli.tts_model_path); if (!file_exists(tts_model_path)) { std::cerr << "Missing TTS model: " << tts_model_path << "\n" - << "Download: scripts/download-cosyvoice3-tts-gguf.sh\n"; + << "Download: vox model download cosyvoice3-tts\n"; return 1; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ea3b3da..9140aaa 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,6 +27,17 @@ target_link_libraries(vox_qwen_asr_config_test add_test(NAME vox_qwen_asr_config_test COMMAND vox_qwen_asr_config_test) +add_executable(vox_model_manager_test + model_manager_test.cpp +) + +target_link_libraries(vox_model_manager_test + PRIVATE + vox_model_manager +) + +add_test(NAME vox_model_manager_test COMMAND vox_model_manager_test) + add_executable(vox_qwen_asr_smoke_test qwen_asr_smoke_test.cpp ) diff --git a/tests/model_manager_test.cpp b/tests/model_manager_test.cpp new file mode 100644 index 0000000..a636663 --- /dev/null +++ b/tests/model_manager_test.cpp @@ -0,0 +1,118 @@ +#include "model_manager.h" + +#include +#include +#include +#include +#include +#include + +namespace { + +bool expect(bool condition, const std::string & message) { + if (!condition) { + std::cerr << message << "\n"; + return false; + } + return true; +} + +} // namespace + +int main() { + namespace fs = std::filesystem; + + bool ok = true; + const auto now = std::chrono::steady_clock::now().time_since_epoch().count(); + const fs::path root = fs::temp_directory_path() / ("vox_model_manager_test_" + std::to_string(now)); + std::error_code ec; + fs::remove_all(root, ec); + fs::create_directories(root / "models", ec); + + const vox::app::model::ManagedModel * model = vox::app::model::find_model("whisper-base"); + ok = expect(model != nullptr, "whisper-base should be supported") && ok; + ok = expect(vox::app::model::find_model("missing-model") == nullptr, "unknown model should not resolve") && ok; + if (!model) { + return 1; + } + + vox::app::model::ManagedModelStatus status = vox::app::model::inspect_model(*model, root); + ok = expect(!status.installed && !status.complete, "missing model should not be complete") && ok; + + const fs::path model_path = root / "models/ggml-base.bin"; + { + std::ofstream empty(model_path, std::ios::binary); + } + status = vox::app::model::inspect_model(*model, root); + ok = expect(status.installed && !status.complete, "empty model file should be incomplete") && ok; + + std::ostringstream out; + std::ostringstream err; + int result = vox::app::model::run_model_command({"list", "--installed"}, root, out, err); + ok = expect(result == 0, "list --installed should succeed for incomplete models") && ok; + ok = expect(out.str().find("whisper-base") != std::string::npos, "installed list should include incomplete model") && ok; + + { + std::ofstream file(model_path, std::ios::binary); + file << "not a real model, but enough to test file completeness"; + } + status = vox::app::model::inspect_model(*model, root); + ok = expect(status.installed && status.complete, "non-empty model file should be complete") && ok; + + out.str(""); + out.clear(); + err.str(""); + err.clear(); + result = vox::app::model::run_model_command({"verify", "whisper-base"}, root, out, err); + ok = expect(result == 0, "verify should succeed for a complete local model") && ok; + ok = expect(out.str().find("checksum: unavailable") != std::string::npos, "verify should show checksum status") && ok; + + out.str(""); + out.clear(); + err.str(""); + err.clear(); + result = vox::app::model::run_model_command({"list", "--installed"}, root, out, err); + ok = expect(result == 0, "list --installed should succeed") && ok; + ok = expect(out.str().find("whisper-base") != std::string::npos, "installed list should include complete model") && ok; + + out.str(""); + out.clear(); + err.str(""); + err.clear(); + result = vox::app::model::run_model_command({"remove", "whisper-base"}, root, out, err); + ok = expect(result == 0, "remove should succeed") && ok; + ok = expect(!fs::exists(model_path), "remove should delete model file") && ok; + + { + std::ofstream partial(model_path.string() + ".part", std::ios::binary); + partial << "partial"; + } + status = vox::app::model::inspect_model(*model, root); + ok = expect(!status.installed && status.has_partial_download && !status.complete, "partial download should be incomplete") && ok; + out.str(""); + out.clear(); + err.str(""); + err.clear(); + result = vox::app::model::run_model_command({"list", "--installed"}, root, out, err); + ok = expect(result == 0, "list --installed should succeed for partial downloads") && ok; + ok = expect(out.str().find("whisper-base") != std::string::npos, "installed list should include partial download") && ok; + + { + std::ofstream file(model_path, std::ios::binary); + file << "not a real model, but enough to test file completeness"; + } + const fs::path partial_path = model_path.string() + ".part"; + ok = expect(fs::exists(partial_path), "stale .part file should exist before repair") && ok; + out.str(""); + out.clear(); + err.str(""); + err.clear(); + result = vox::app::model::run_model_command({"repair", "whisper-base"}, root, out, err); + ok = expect(result == 0, "repair should succeed when model is already complete") && ok; + ok = expect(!fs::exists(partial_path), "repair should remove stale .part file") && ok; + ok = expect(fs::exists(model_path), "repair should keep the complete model file") && ok; + ok = expect(out.str().find("Model already installed") != std::string::npos, "repair should report model already installed") && ok; + + fs::remove_all(root, ec); + return ok ? 0 : 1; +}