diff --git a/src/BUILD b/src/BUILD index df50004d5b..bac3cf342d 100644 --- a/src/BUILD +++ b/src/BUILD @@ -2485,6 +2485,7 @@ cc_test( "test/kfs_rest_test.cpp", "test/kfs_rest_parser_test.cpp", "test/layout_test.cpp", + "test/libgit2_test.cpp", "test/metric_config_test.cpp", "test/metrics_test.cpp", "test/metrics_flow_test.cpp", diff --git a/src/pull_module/hf_pull_model_module.hpp b/src/pull_module/hf_pull_model_module.hpp index 2742ac23ca..46b92b8b57 100644 --- a/src/pull_module/hf_pull_model_module.hpp +++ b/src/pull_module/hf_pull_model_module.hpp @@ -21,7 +21,7 @@ #include "../capi_frontend/server_settings.hpp" namespace ovms { - +class Libgt2InitGuard; class HfPullModelModule : public Module { protected: HFSettingsImpl hfSettings; @@ -40,4 +40,6 @@ class HfPullModelModule : public Module { static const std::string GIT_SERVER_TIMEOUT_ENV; static const std::string GIT_SSL_CERT_LOCATIONS_ENV; }; + +std::variant> createGuard(); } // namespace ovms diff --git a/src/pull_module/libgit2.cpp b/src/pull_module/libgit2.cpp index 1566805c74..d2985ded17 100644 --- a/src/pull_module/libgit2.cpp +++ b/src/pull_module/libgit2.cpp @@ -15,9 +15,12 @@ //***************************************************************************** #include "libgit2.hpp" +#include #include -#include #include +#include +#include +#include #include #include @@ -45,6 +48,7 @@ #endif namespace ovms { +namespace fs = std::filesystem; // Callback for clone authentication - will be used when password is not set in repo_url // Does not work with LFS download as it requires additional authentication when password is not set in repository url @@ -68,16 +72,16 @@ int cred_acquire_cb(git_credential** out, password = _strdup(username); #endif } else { - fprintf(stderr, "HF_TOKEN env variable is not set.\n"); + fprintf(stderr, "[ERROR] HF_TOKEN env variable is not set.\n"); return -1; } error = git_credential_userpass_plaintext_new(out, username, password); if (error < 0) { - fprintf(stderr, "Creating credentials failed.\n"); + fprintf(stderr, "[ERROR] Creating credentials failed.\n"); error = -1; } } else { - fprintf(stderr, "Only USERPASS_PLAINTEXT supported in OVMS.\n"); + fprintf(stderr, "[ERROR] Only USERPASS_PLAINTEXT supported in OVMS.\n"); return 1; } @@ -179,6 +183,375 @@ Status HfDownloader::RemoveReadonlyFileAttributeFromDir(const std::string& direc return StatusCode::OK; } +class GitRepositoryGuard { +public: + git_repository* repo = nullptr; + int git_error_class = 0; + + GitRepositoryGuard(const std::string& path) { + int error = git_repository_open_ext(&repo, path.c_str(), 0, nullptr); + if (error < 0) { + const git_error* err = git_error_last(); + if (err) { + SPDLOG_ERROR("Repository open failed: {} {}", err->klass, err->message); + git_error_class = err->klass; + } else { + SPDLOG_ERROR("Repository open failed: {}", error); + } + if (repo) + git_repository_free(repo); + } + } + + ~GitRepositoryGuard() { + if (repo) { + git_repository_free(repo); + } + } + + // Allow implicit access to the raw pointer + git_repository* get() const { return repo; } + operator git_repository*() const { return repo; } + + // Non-copyable + GitRepositoryGuard(const GitRepositoryGuard&) = delete; + GitRepositoryGuard& operator=(const GitRepositoryGuard&) = delete; + + // Movable + GitRepositoryGuard(GitRepositoryGuard&& other) noexcept { + repo = other.repo; + other.repo = nullptr; + } + GitRepositoryGuard& operator=(GitRepositoryGuard&& other) noexcept { + if (this != &other) { + if (repo) + git_repository_free(repo); + repo = other.repo; + other.repo = nullptr; + } + return *this; + } +}; + +Status HfDownloader::CheckRepositoryStatus(bool checkUntracked) { + GitRepositoryGuard repoGuard(this->downloadPath); + if (!repoGuard.get()) { + if (repoGuard.git_error_class == 2) + return StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH; + else if (repoGuard.git_error_class == 3) + return StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED; + else + return StatusCode::HF_GIT_STATUS_FAILED; + } + // HEAD state info + bool is_detached = git_repository_head_detached(repoGuard.get()) == 1; + bool is_unborn = git_repository_head_unborn(repoGuard.get()) == 1; + + // Collect status (staged/unstaged/untracked) + git_status_options opts = GIT_STATUS_OPTIONS_INIT; + + opts.show = GIT_STATUS_SHOW_INDEX_AND_WORKDIR; + opts.flags = GIT_STATUS_OPT_INCLUDE_UNTRACKED // include untracked files // | GIT_STATUS_OPT_RENAMES_HEAD_TO_INDEX // detect renames HEAD->index - not required currently and impacts performance + | GIT_STATUS_OPT_SORT_CASE_SENSITIVELY; + + git_status_list* status_list = nullptr; + int error = git_status_list_new(&status_list, repoGuard.get(), &opts); + if (error != 0) { + return StatusCode::HF_GIT_STATUS_FAILED; + } + + size_t staged = 0, unstaged = 0, untracked = 0, conflicted = 0; + const size_t n = git_status_list_entrycount(status_list); // iterate entries + for (size_t i = 0; i < n; ++i) { + const git_status_entry* e = git_status_byindex(status_list, i); + unsigned s = e->status; + + // Staged (index) changes + if (s & (GIT_STATUS_INDEX_NEW | + GIT_STATUS_INDEX_MODIFIED | + GIT_STATUS_INDEX_DELETED | + GIT_STATUS_INDEX_RENAMED | + GIT_STATUS_INDEX_TYPECHANGE)) + ++staged; + + // Unstaged (workdir) changes + if (s & (GIT_STATUS_WT_MODIFIED | + GIT_STATUS_WT_DELETED | + GIT_STATUS_WT_RENAMED | + GIT_STATUS_WT_TYPECHANGE)) + ++unstaged; + + // Untracked + if (s & GIT_STATUS_WT_NEW) + ++untracked; + + // libgit2 will also flag conflicted entries via status/diff machinery + if (s & GIT_STATUS_CONFLICTED) + ++conflicted; + } + + std::stringstream ss; + ss << "HEAD state : " + << (is_unborn ? "unborn (no commits)" : (is_detached ? "detached" : "attached")) + << "\n"; + ss << "Staged changes : " << staged << "\n"; + ss << "Unstaged changes: " << unstaged << "\n"; + ss << "Untracked files : " << untracked << "\n"; + if (conflicted) + ss << " (" << conflicted << " paths flagged)"; + + SPDLOG_DEBUG(ss.str()); + git_status_list_free(status_list); + + // We do not care about untracked until after git clone + if (is_unborn || is_detached || staged || unstaged || conflicted || (checkUntracked && untracked)) { + return StatusCode::HF_GIT_STATUS_UNCLEAN; + } + return StatusCode::OK; +} + +#define CHECK(call) \ + do { \ + int _err = (call); \ + if (_err < 0) { \ + const git_error* e = git_error_last(); \ + fprintf(stderr, "[ERROR] %d: %s (%s:%d)\n", _err, e && e->message ? e->message : "no message", __FILE__, __LINE__); \ + return; \ + } \ + } while (0) + +// Trim trailing '\r' (for CRLF files) and surrounding spaces +void rtrimCrLfWhitespace(std::string& s) { + if (!s.empty() && s.back() == '\r') + s.pop_back(); // remove trailing '\r' + while (!s.empty() && std::isspace(static_cast(s.back()))) + s.pop_back(); // trailing ws + size_t i = 0; + while (i < s.size() && std::isspace(static_cast(s[i]))) + ++i; // leading ws + if (i > 0) + s.erase(0, i); +} + +// Case-insensitive substring search: returns true if 'needle' is found in 'hay' +bool containsCaseInsensitive(const std::string& hay, const std::string& needle) { + auto toLower = [](std::string v) { + std::transform(v.begin(), v.end(), v.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + return v; + }; + std::string hayLower = toLower(hay); + std::string needleLower = toLower(needle); + return hayLower.find(needleLower) != std::string::npos; +} + +// Read at most the first 3 lines of a file, with a per-line cap to avoid huge reads. +// Returns true if successful (even if <3 lines exist; vector will just be shorter). + +bool readFirstThreeLines(const std::filesystem::path& p, std::vector& out) { + out.clear(); + + std::ifstream in(p, std::ios::binary); + if (!in) + return false; + + std::string line; + line.reserve(256); // small optimization + int c; + + while (out.size() < 3 && (c = in.get()) != EOF) { + if (c == '\r') { + // Handle CR or CRLF as one line ending + int next = in.peek(); + if (next == '\n') { + in.get(); // consume '\n' + } + // finalize current line + rtrimCrLfWhitespace(line); + out.push_back(std::move(line)); + line.clear(); + } else if (c == '\n') { + // LF line ending + rtrimCrLfWhitespace(line); + out.push_back(std::move(line)); + line.clear(); + } else { + line.push_back(static_cast(c)); + } + } + + // Handle the last line if file did not end with EOL + if (!line.empty() && out.size() < 3) { + rtrimCrLfWhitespace(line); + out.push_back(std::move(line)); + } + + return true; +} + +// Check if the first 3 lines contain required keywords in positional order: +// line1 -> "version", line2 -> "oid", line3 -> "size" (case-insensitive). +bool fileHasLfsKeywordsFirst3Positional(const fs::path& p) { + std::error_code ec; + if (!fs::is_regular_file(p, ec)) + return false; + + std::vector lines; + if (!readFirstThreeLines(p, lines)) + return false; + + if (lines.size() < 3) + return false; + + return containsCaseInsensitive(lines[0], "version") && + containsCaseInsensitive(lines[1], "oid") && + containsCaseInsensitive(lines[2], "size"); +} + +// Helper: make path relative to base (best-effort, non-throwing). +fs::path makeRelativeToBase(const fs::path& path, const fs::path& base) { + std::error_code ec; + // Try fs::relative first (handles canonical comparisons, may fail if on different roots) + fs::path rel = fs::relative(path, base, ec); + if (!ec && !rel.empty()) + return rel; + + // Fallback: purely lexical relative (doesn't access filesystem) + rel = path.lexically_relative(base); + if (!rel.empty()) + return rel; + + // Last resort: return filename only (better than absolute when nothing else works) + if (path.has_filename()) + return path.filename(); + return path; +} + +// Find all files under 'directory' that satisfy the first-3-lines LFS keyword check. Default: bool recursive = true +std::vector findLfsLikeFiles(const std::string& directory, bool recursive) { + std::vector matches; + std::error_code ec; + + if (!fs::exists(directory, ec) || !fs::is_directory(directory, ec)) { + return matches; + } + + if (recursive) { + for (fs::recursive_directory_iterator it(directory, ec), end; !ec && it != end; ++it) { + const auto& p = it->path(); + if (fileHasLfsKeywordsFirst3Positional(p)) { + matches.push_back(makeRelativeToBase(p, directory)); + } + } + } else { + for (fs::directory_iterator it(directory, ec), end; !ec && it != end; ++it) { + const auto& p = it->path(); + if (fileHasLfsKeywordsFirst3Positional(p)) { + matches.push_back(makeRelativeToBase(p, directory)); + } + } + } + return matches; +} + +// pick the right entry pointer type for your libgit2 +#if defined(GIT_LIBGIT2_VER_MAJOR) +// libgit2 ≥ 1.0 generally has const-correct free() (accepts const*) +using git_tree_entry_ptr = const git_tree_entry*; +#else +using git_tree_entry_ptr = git_tree_entry*; +#endif + +// Single guard that owns all temporaries used in resumeLfsDownloadForFile +struct GitScope { + git_object* tree_obj = nullptr; // owns the tree as a generic git_object + git_tree_entry_ptr entry = nullptr; // owns the entry + git_blob* blob = nullptr; // owns the blob + git_buf out = GIT_BUF_INIT; // owns the buffer + + GitScope() = default; + ~GitScope() { cleanup(); } + + GitScope(const GitScope&) = delete; + GitScope& operator=(const GitScope&) = delete; + + GitScope(GitScope&& other) noexcept : + tree_obj(other.tree_obj), + entry(other.entry), + blob(other.blob), + out(other.out) { + other.tree_obj = nullptr; + other.entry = nullptr; + other.blob = nullptr; + other.out = GIT_BUF_INIT; + } + GitScope& operator=(GitScope&& other) noexcept { + if (this != &other) { + cleanup(); + tree_obj = other.tree_obj; + entry = other.entry; + blob = other.blob; + out = other.out; + other.tree_obj = nullptr; + other.entry = nullptr; + other.blob = nullptr; + other.out = GIT_BUF_INIT; + } + return *this; + } + + git_tree* tree() const { return reinterpret_cast(tree_obj); } + +private: + void cleanup() noexcept { + git_buf_dispose(&out); + if (blob) { + git_blob_free(blob); + blob = nullptr; + } + if (entry) { + git_tree_entry_free(entry); + entry = nullptr; + } + if (tree_obj) { + git_object_free(tree_obj); + tree_obj = nullptr; + } + } +}; + +void resumeLfsDownloadForFile(git_repository* repo, const char* filePathInRepo) { + GitScope g; + + // Resolve HEAD tree (origin/main^{tree}) + CHECK(git_revparse_single(&g.tree_obj, repo, "origin/main^{tree}")); + + // Find the tree entry by path + CHECK(git_tree_entry_bypath(&g.entry, g.tree(), filePathInRepo)); + + // Ensure it's a blob + if (git_tree_entry_type(g.entry) != GIT_OBJECT_BLOB) { + fprintf(stderr, "[ERROR] Path is not a blob: %s\n", filePathInRepo); + return; // Guard cleans up + } + + // Lookup the blob + CHECK(git_blob_lookup(&g.blob, repo, git_tree_entry_id(g.entry))); + + // Configure filter behavior + git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT; + // Choose direction: + // GIT_BLOB_FILTER_TO_WORKTREE : apply smudge (as if writing to working tree) + // GIT_BLOB_FILTER_TO_ODB : apply clean (as if writing to ODB) + // opts.flags = GIT_BLOB_FILTER_TO_WORKTREE; + + // Apply filters based on .gitattributes for this path (triggers LFS smudge/clean) + CHECK(git_blob_filter(&g.out, g.blob, filePathInRepo, &opts)); + + // We don't need the buffer contents; the filter side-effects are enough. + // All resources (out, blob, entry, tree_obj) will be freed automatically here. +} + Status HfDownloader::downloadModel() { if (FileSystem::isPathEscaped(this->downloadPath)) { SPDLOG_ERROR("Path {} escape with .. is forbidden.", this->downloadPath); @@ -187,8 +560,53 @@ Status HfDownloader::downloadModel() { // Repository exists and we do not want to overwrite if (std::filesystem::is_directory(this->downloadPath) && !this->overwriteModels) { - std::cout << "Path already exists on local filesystem. Skipping download to path: " << this->downloadPath << std::endl; - return StatusCode::OK; + // Checking if the download was partially finished for any files in repository + auto matches = findLfsLikeFiles(this->downloadPath, true); + + if (matches.empty()) { + std::cout << "No files to resume download found.\n"; + std::cout << "Path already exists on local filesystem. Skipping download to path: " << this->downloadPath << std::endl; + return StatusCode::OK; + } else { + std::cout << "Found " << matches.size() << " file(s) to resume partial download:\n"; + for (const auto& p : matches) { + std::cout << " " << p.string() << "\n"; + } + } + + GitRepositoryGuard repoGuard(this->downloadPath); + if (!repoGuard.get()) { + std::cout << "Path already exists on local filesystem. And is not a git repository: " << this->downloadPath << std::endl; + if (repoGuard.git_error_class == 2) + return StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH; + else if (repoGuard.git_error_class == 3) + return StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED; + else + return StatusCode::HF_GIT_STATUS_FAILED; + } + + // Set repository url + std::string passRepoUrl = GetRepositoryUrlWithPassword(); + const char* url = passRepoUrl.c_str(); + int error = git_repository_set_url(repoGuard.get(), url); + if (error < 0) { + const git_error* err = git_error_last(); + if (err) + SPDLOG_ERROR("Repository set url failed: {} {}", err->klass, err->message); + else + SPDLOG_ERROR("Repository set url failed: {}", error); + std::cout << "Path already exists on local filesystem. And set git repository url failed: " << this->downloadPath << std::endl; + return StatusCode::HF_GIT_CLONE_FAILED; + } + + for (const auto& p : matches) { + std::cout << " Resuming " << p.string() << "...\n"; + std::string path = p.string(); + resumeLfsDownloadForFile(repoGuard.get(), path.c_str()); + } + + SPDLOG_DEBUG("Checking repository status."); + return CheckRepositoryStatus(false); } auto status = IModelDownloader::checkIfOverwriteAndRemove(); @@ -197,6 +615,7 @@ Status HfDownloader::downloadModel() { } SPDLOG_DEBUG("Downloading to path: {}", this->downloadPath); + git_repository* cloned_repo = NULL; // clone_opts for progress reporting set in libgit2 lib by patch git_clone_options clone_opts = GIT_CLONE_OPTIONS_INIT; @@ -225,12 +644,17 @@ Status HfDownloader::downloadModel() { SPDLOG_ERROR("Libgit2 clone error: {} message: {}", err->klass, err->message); else SPDLOG_ERROR("Libgit2 clone error: {}", error); - return StatusCode::HF_GIT_CLONE_FAILED; } else if (cloned_repo) { git_repository_free(cloned_repo); } + SPDLOG_DEBUG("Checking repository status."); + status = CheckRepositoryStatus(true); + if (!status.ok()) { + return status; + } + // libgit2 clone sets readonly attributes status = RemoveReadonlyFileAttributeFromDir(this->downloadPath); if (!status.ok()) { diff --git a/src/pull_module/libgit2.hpp b/src/pull_module/libgit2.hpp index 943f3cf725..f2da9f875f 100644 --- a/src/pull_module/libgit2.hpp +++ b/src/pull_module/libgit2.hpp @@ -15,8 +15,10 @@ // limitations under the License. //***************************************************************************** #pragma once -#include +#include #include +#include +#include #include #include @@ -31,6 +33,7 @@ namespace ovms { class Status; +namespace fs = std::filesystem; /* * libgit2 options. 0 is the default value @@ -62,5 +65,13 @@ class HfDownloader : public IModelDownloader { std::string GetRepositoryUrlWithPassword(); bool CheckIfProxySet(); Status RemoveReadonlyFileAttributeFromDir(const std::string& directoryPath); + Status CheckRepositoryStatus(bool checkUntracked); }; + +void rtrimCrLfWhitespace(std::string& s); +bool containsCaseInsensitive(const std::string& hay, const std::string& needle); +bool readFirstThreeLines(const fs::path& p, std::vector& outLines); +bool fileHasLfsKeywordsFirst3Positional(const fs::path& p); +fs::path makeRelativeToBase(const fs::path& path, const fs::path& base); +std::vector findLfsLikeFiles(const std::string& directory, bool recursive = true); } // namespace ovms diff --git a/src/status.cpp b/src/status.cpp index 97a92b9d30..0cc057042b 100644 --- a/src/status.cpp +++ b/src/status.cpp @@ -348,6 +348,10 @@ const std::unordered_map Status::statusMessageMap = { {StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, "Failed to run optimum-cli export command"}, {StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, "Failed to run convert-tokenizer export command"}, {StatusCode::HF_GIT_CLONE_FAILED, "Failed in libgit2 execution of clone method"}, + {StatusCode::HF_GIT_STATUS_FAILED, "Failed in libgit2 execution of status method"}, + {StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH, "Failed in libgit2 to check repository status for a given path"}, + {StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED, "Libgit2 was not initialized"}, + {StatusCode::HF_GIT_STATUS_UNCLEAN, "Unclean status detected in libgit2 repository path"}, {StatusCode::PARTIAL_END, "Request has finished and no further communication is needed"}, {StatusCode::NONEXISTENT_PATH, "Nonexistent path"}, diff --git a/src/status.hpp b/src/status.hpp index 18a2b093b5..02b42886a5 100644 --- a/src/status.hpp +++ b/src/status.hpp @@ -360,6 +360,10 @@ enum class StatusCode { HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, HF_GIT_CLONE_FAILED, + HF_GIT_STATUS_FAILED, + HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH, + HF_GIT_LIGIT2_NOT_INITIALIZED, + HF_GIT_STATUS_UNCLEAN, PARTIAL_END, NONEXISTENT_PATH, diff --git a/src/test/libgit2_test.cpp b/src/test/libgit2_test.cpp new file mode 100644 index 0000000000..b221ee9c54 --- /dev/null +++ b/src/test/libgit2_test.cpp @@ -0,0 +1,772 @@ +//***************************************************************************** +// Copyright 2026 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "src/pull_module/libgit2.hpp" + +#include "environment.hpp" + +namespace fs = std::filesystem; + +TEST(LibGit2RtrimCrLfWhitespace, EmptyString) { + std::string s; + ovms::rtrimCrLfWhitespace(s); + EXPECT_TRUE(s.empty()); +} + +TEST(LibGit2RtrimCrLfWhitespace, NoWhitespace) { + std::string s = "abc"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, OnlySpaces) { + std::string s = " "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, ""); +} + +TEST(LibGit2RtrimCrLfWhitespace, LeadingSpacesOnly) { + std::string s = " abc"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingSpacesOnly) { + std::string s = "abc "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, LeadingAndTrailingSpaces) { + std::string s = " abc "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TabsAndNewlinesAround) { + std::string s = "\t\n abc \n\t"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, AllCWhitespaceAround) { + // Include space, tab, newline, vertical tab, form feed, carriage return + std::string s = " \t\n\v\f\rabc\r\f\v\n\t "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, PreserveInternalSpaces) { + std::string s = " a b c "; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "a b c"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingCRLF) { + // Windows-style line ending: "\r\n" + std::string s = "abc\r\n"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingCROnly) { + std::string s = "abc\r"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, TrailingLFOnly) { + std::string s = "abc\n"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, MultipleTrailingCRs) { + // Only one trailing '\r' is specially removed first, but then trailing + // whitespace loop will remove any remaining CRs (since isspace('\r') == true). + std::string s = "abc\r\r\r"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, LeadingCRLFAndSpaces) { + std::string s = "\r\n abc"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "abc"); +} + +TEST(LibGit2RtrimCrLfWhitespace, InternalCRLFShouldRemainIfNotLeadingOrTrailing) { + // Internal whitespace should be preserved + std::string s = "a\r\nb"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, "a\r\nb"); +} + +TEST(LibGit2RtrimCrLfWhitespace, OnlyCRLFAndWhitespace) { + std::string s = "\r\n\t \r"; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, ""); +} + +TEST(LibGit2RtrimCrLfWhitespace, NonAsciiBytesAreNotTrimmedByIsspace) { + // 0xC2 0xA0 is UTF-8 for NO-BREAK SPACE; bytes individually are not ASCII spaces. + // isspace() on unsigned char typically returns false for these bytes in the "C" locale. + // So they should remain unless at edges and recognized by the current locale (usually not). + std::string s = "\xC2" + "\xA0" + "abc" + "\xC2" + "\xA0"; + ovms::rtrimCrLfWhitespace(s); + // Expect unchanged because these bytes are not recognized by std::isspace in C locale + EXPECT_EQ(s, "\xC2" + "\xA0" + "abc" + "\xC2" + "\xA0"); +} + +TEST(LibGit2RtrimCrLfWhitespace, Idempotent) { + std::string s = " abc \n"; + ovms::rtrimCrLfWhitespace(s); + auto once = s; + ovms::rtrimCrLfWhitespace(s); + EXPECT_EQ(s, once); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, ExactMatch) { + EXPECT_TRUE(ovms::containsCaseInsensitive("hello", "hello")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, MixedCaseMatch) { + EXPECT_TRUE(ovms::containsCaseInsensitive("HeLLo WoRLD", "world")); + EXPECT_TRUE(ovms::containsCaseInsensitive("HeLLo WoRLD", "HELLO")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, NoMatch) { + EXPECT_FALSE(ovms::containsCaseInsensitive("abcdef", "gh")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, EmptyNeedleReturnsTrue) { + // Consistent with std::string::find("") → 0 + EXPECT_TRUE(ovms::containsCaseInsensitive("something", "")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, EmptyHaystackNonEmptyNeedleReturnsFalse) { + EXPECT_FALSE(ovms::containsCaseInsensitive("", "abc")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, BothEmptyReturnsTrue) { + EXPECT_TRUE(ovms::containsCaseInsensitive("", "")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, SubstringAtBeginning) { + EXPECT_TRUE(ovms::containsCaseInsensitive("HelloWorld", "hello")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, SubstringInMiddle) { + EXPECT_TRUE(ovms::containsCaseInsensitive("abcHELLOxyz", "hello")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, SubstringAtEnd) { + EXPECT_TRUE(ovms::containsCaseInsensitive("testCASE", "case")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, NoFalsePositives) { + EXPECT_FALSE(ovms::containsCaseInsensitive("aaaaa", "b")); +} + +TEST(LibGit2ContainsCaseInsensitiveTest, UnicodeCharactersSafeButNotSpecialHandled) { + // std::tolower only reliably handles unsigned char range. + // This ensures your implementation does not crash or behave strangely. + EXPECT_FALSE(ovms::containsCaseInsensitive("ĄĆĘŁ", "ę")); // depends on locale; ASCII-only expected false +} + +// A helper for writing test files. +static fs::path writeTempFile(const std::string& filename, + const std::string& content) { + fs::path p = fs::temp_directory_path() / filename; + std::ofstream out(p, std::ios::binary); + out << content; + return p; +} + +TEST(LibGit2ReadFirstThreeLinesTest, FileNotFoundReturnsFalse) { + std::vector lines; + fs::path p = fs::temp_directory_path() / "nonexistent_12345.txt"; + EXPECT_FALSE(ovms::readFirstThreeLines(p, lines)); + EXPECT_TRUE(lines.empty()); +} + +TEST(LibGit2ReadFirstThreeLinesTest, ReadsExactlyThreeLines) { + fs::path p = writeTempFile("three_lines.txt", + "line1\n" + "line2\n" + "line3\n" + "extra\n"); // should be ignored + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 3u); + EXPECT_EQ(out[0], "line1"); + EXPECT_EQ(out[1], "line2"); + EXPECT_EQ(out[2], "line3"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, ReadsFewerThanThreeLines) { + fs::path p = writeTempFile("two_lines.txt", + "alpha\n" + "beta\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 2u); + EXPECT_EQ(out[0], "alpha"); + EXPECT_EQ(out[1], "beta"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, ReadsOneLineOnly) { + fs::path p = writeTempFile("one_line.txt", "solo\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 1u); + EXPECT_EQ(out[0], "solo"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, EmptyFileProducesZeroLinesAndReturnsTrue) { + fs::path p = writeTempFile("empty.txt", ""); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + EXPECT_TRUE(out.empty()); +} + +TEST(LibGit2ReadFirstThreeLinesTest, CRLFIsTrimmedCorrectly) { + fs::path p = writeTempFile("crlf.txt", + "hello\r\n" + "world\r\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + ASSERT_EQ(out.size(), 2u); + EXPECT_EQ(out[0], "hello"); + EXPECT_EQ(out[1], "world"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, LoneCRAndLFAreTrimmed) { + fs::path p = writeTempFile("mixed_newlines.txt", + "a\r" + "b\n" + "c\r\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + + ASSERT_EQ(out.size(), 3u); + EXPECT_EQ(out[0], "a"); + EXPECT_EQ(out[1], "b"); + EXPECT_EQ(out[2], "c"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, HandlesEOFWithoutNewlineAtEnd) { + fs::path p = writeTempFile("eof_no_newline.txt", + "first\n" + "second\n" + "third_without_newline"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + + ASSERT_EQ(out.size(), 3u); + EXPECT_EQ(out[0], "first"); + EXPECT_EQ(out[1], "second"); + EXPECT_EQ(out[2], "third_without_newline"); +} + +TEST(LibGit2ReadFirstThreeLinesTest, TrailingWhitespaceNotPreserved) { + fs::path p = writeTempFile("spaces.txt", + "abc \n" + "def\t\t\n"); + + std::vector out; + EXPECT_TRUE(ovms::readFirstThreeLines(p, out)); + + ASSERT_EQ(out.size(), 2u); + EXPECT_EQ(out[0], "abc"); // spaces preserved + EXPECT_EQ(out[1], "def"); // tabs preserved +} + +// Optional: If you need to call readFirstThreeLines in any test-specific checks, +// declare it too (remove if unused here). +// bool readFirstThreeLines(const fs::path& p, std::vector& out); + +// ---- Test Utilities ---- + +// Create a unique temporary directory inside the system temp directory. +static fs::path createTempDir() { + const fs::path base = fs::temp_directory_path(); + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dist; + + // Try a reasonable number of times to avoid rare collisions + for (int attempt = 0; attempt < 100; ++attempt) { + auto candidate = base / ("lfs_kw_tests_" + std::to_string(dist(gen))); + std::error_code ec; + if (fs::create_directory(candidate, ec)) { + return candidate; + } + // If creation failed due to existing path, loop and try another name + // Otherwise (e.g., permissions), fall through and try again up to limit + } + + throw std::runtime_error("Failed to create a unique temporary directory"); +} + +static fs::path writeFile(const fs::path& dir, const std::string& name, const std::string& content) { + fs::path p = dir / name; + std::ofstream out(p, std::ios::binary); + if (!out) + throw std::runtime_error("Failed to create file: " + p.string()); + out.write(content.data(), static_cast(content.size())); + return p; +} + +// A simple RAII for a temp directory +struct TempDir { + fs::path dir; + TempDir() : + dir(createTempDir()) { + if (dir.empty()) + throw std::runtime_error("Failed to create temp directory"); + } + ~TempDir() { + std::error_code ec; + fs::remove_all(dir, ec); + } +}; + +class LibGit2FileHasLfsKeywordsFirst3PositionalTest : public ::testing::Test { +protected: + TempDir td; +}; + +// ---- Tests ---- + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForNonExistingFile) { + fs::path p = td.dir / "does_not_exist.txt"; + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForDirectoryPath) { + // Passing the directory itself (not a regular file) + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(td.dir)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForEmptyFile) { + auto p = writeFile(td.dir, "empty.txt", ""); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ReturnsFalseForLessThanThreeLines) { + { + auto p = writeFile(td.dir, "one_line.txt", "version something\n"); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); + } + { + auto p = writeFile(td.dir, "two_lines.txt", "version x\n" + "oid y\n"); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); + } +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, HappyPathCaseInsensitiveAndExtraContent) { + // Lines contain the keywords somewhere (case-insensitive), extra content is okay. + const std::string content = + " VeRsIoN https://git-lfs.github.com/spec/v1 \n" + "\toid Sha256:abcdef1234567890\n" + "size 999999 \t \n"; + auto p = writeFile(td.dir, "ok.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, WrongOrderShouldFail) { + // Put keywords in wrong lines + const std::string content = + "size 100\n" + "version something\n" + "oid abc\n"; + auto p = writeFile(td.dir, "wrong_order.txt", content); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, MissingKeywordShouldFail) { + // Line1 has version, line2 missing oid, line3 has size + const std::string content = + "version v1\n" + "hash sha256:abc\n" + "size 42\n"; + auto p = writeFile(td.dir, "missing_keyword.txt", content); + EXPECT_FALSE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, MixedNewlines_CR_LF_CRLF_ShouldPass) { + // Requires readFirstThreeLines to treat \r, \n, and \r\n as line breaks. + const std::string content = + "version one\r" + "oid two\n" + "size three\r\n"; + auto p = writeFile(td.dir, "mixed_newlines.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, LeadingAndTrailingWhitespaceDoesNotBreak) { + // Assuming readFirstThreeLines trims edge whitespace; otherwise contains() still works + const std::string content = + " version \n" + "\t oid\t\n" + " size \t\n"; + auto p = writeFile(td.dir, "whitespace.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, KeywordsMayAppearWithinLongerTextOnEachLine) { + const std::string content = + "prefix-version-suffix\n" + "some_oid_here\n" + "the_size_is_here\n"; + auto p = writeFile(td.dir, "contains_substrings.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, CaseInsensitiveCheck) { + const std::string content = + "VerSiOn 1\n" + "OID something\n" + "SiZe 123\n"; + auto p = writeFile(td.dir, "case_insensitive.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +TEST_F(LibGit2FileHasLfsKeywordsFirst3PositionalTest, ExtraLinesAfterFirstThreeDoNotMatter) { + const std::string content = + "version v1\n" + "oid abc\n" + "size 42\n" + "EXTRA LINE THAT SHOULD NOT AFFECT RESULT\n"; + auto p = writeFile(td.dir, "extra_lines.txt", content); + EXPECT_TRUE(ovms::fileHasLfsKeywordsFirst3Positional(p)); +} + +class LibGit2MakeRelativeToBaseTest : public ::testing::Test { +protected: + TempDir td; +}; + +// Base is an ancestor of path → should return the relative tail. +TEST_F(LibGit2MakeRelativeToBaseTest, BaseIsAncestor) { + fs::path base = td.dir / "root"; + fs::path sub = base / "a" / "b" / "file.txt"; + + std::error_code ec; + fs::create_directories(sub.parent_path(), ec); + + fs::path rel = ovms::makeRelativeToBase(sub, base); + // Expected: "a/b/file.txt" (platform-correct separators) + EXPECT_EQ(rel, fs::path("a") / "b" / "file.txt"); +} + +// Path equals base → fs::relative returns "." (non-empty), we keep it. +TEST_F(LibGit2MakeRelativeToBaseTest, PathEqualsBase) { + fs::path base = td.dir / "same"; + std::error_code ec; + fs::create_directories(base, ec); + + fs::path rel = ovms::makeRelativeToBase(base, base); + EXPECT_EQ(rel, fs::path(".")); +} + +// Sibling subtree: base is ancestor of both; result is still relative path from base. +TEST_F(LibGit2MakeRelativeToBaseTest, SiblingSubtree) { + fs::path base = td.dir / "root2"; + fs::path a = base / "a" / "deep" / "fileA.txt"; + fs::path b = base / "b"; + + std::error_code ec; + fs::create_directories(a.parent_path(), ec); + fs::create_directories(b, ec); + + fs::path rel = ovms::makeRelativeToBase(a, base); + EXPECT_EQ(rel, fs::path("a") / "deep" / "fileA.txt"); +} + +// Base is not an ancestor but on same root → return a proper upward relative like "../x/y". +TEST_F(LibGit2MakeRelativeToBaseTest, BaseIsNotAncestorButSameRoot) { + fs::path base = td.dir / "top" / "left"; + fs::path path = td.dir / "top" / "right" / "x" / "y.txt"; + + std::error_code ec; + fs::create_directories(base, ec); + fs::create_directories(path.parent_path(), ec); + + fs::path rel = ovms::makeRelativeToBase(path, base); + // From .../top/left to .../top/right/x/y.txt → "../right/x/y.txt" + EXPECT_EQ(rel, fs::path("..") / "right" / "x" / "y.txt"); +} + +// Works even if paths do not exist (lexical computation should still yield a sensible result) +TEST_F(LibGit2MakeRelativeToBaseTest, NonExistingPathsLexicalStillWorks) { + fs::path base = td.dir / "ghost" / "base"; + fs::path path = td.dir / "ghost" / "base" / "sub" / "file.dat"; + // No directories created + + fs::path rel = ovms::makeRelativeToBase(path, base); + EXPECT_EQ(rel, fs::path("sub") / "file.dat"); +} + +// Last resort on Windows: different drive letters → fs::relative fails, +// lexically_relative returns empty → function should return filename only. +#ifdef _WIN32 +TEST_F(LibGit2MakeRelativeToBaseTest, DifferentDrivesReturnsFilenameOnly) { + // NOTE: We don't touch the filesystem; we only test the path logic. + // Choose typical drive letters; test won't fail if the drive doesn't exist + // because we don't access the filesystem in lexically_relative path. + fs::path path = fs::path("D:\\folder\\file.txt"); + fs::path base = fs::path("C:\\another\\base"); + + fs::path rel = ovms::makeRelativeToBase(path, base); + EXPECT_EQ(rel, fs::path("file.txt")); +} +#endif + +// If path has no filename (e.g., it's a root), last resort returns path itself. +// On POSIX, "/" has no filename; on Windows, "C:\\" has no filename either. +TEST_F(LibGit2MakeRelativeToBaseTest, NoFilenameEdgeCaseReturnsPathItself) { + fs::path base = td.dir; // arbitrary +#if defined(_WIN32) + // Construct a path that has no filename: root-name + root-directory + // We can't know the system drive at compile time; use a generic root directory. + // For the test, we simulate a root-only path lexically. + fs::path path = fs::path("C:\\"); // has no filename +#else + fs::path path = fs::path("../.."); // has no filename +#endif + + fs::path rel = ovms::makeRelativeToBase(path, base); + EXPECT_EQ(rel, path); +} + +static void mkdirs(const fs::path& p) { + std::error_code ec; + fs::create_directories(p, ec); +} + +class LibGit2FindLfsLikeFilesTest : public ::testing::Test { +protected: + TempDir td; + + // Utility: sort paths lexicographically for deterministic comparison + static void sortPaths(std::vector& v) { + std::sort(v.begin(), v.end(), [](const fs::path& a, const fs::path& b) { + return a.generic_string() < b.generic_string(); + }); + } +}; + +// --- Tests --- + +TEST_F(LibGit2FindLfsLikeFilesTest, NonExistingDirectoryReturnsEmpty) { + fs::path nonexist = td.dir / "does_not_exist"; + auto matches = ovms::findLfsLikeFiles(nonexist.string(), /*recursive=*/true); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, EmptyDirectoryReturnsEmpty) { + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, NonRecursiveFindsOnlyTopLevelMatches) { + // Layout: + // td.dir/ + // match_top.txt (should match) + // nomatch_top.txt (should not match) + // sub/ + // match_nested.txt (should match but NOT included in non-recursive) + // Matching condition: lines[0] contains "version", lines[1] contains "oid", lines[2] contains "size" + + // Create top-level files + writeFile(td.dir, "match_top.txt", + "version v1\n" + "oid sha256:abc\n" + "size 123\n"); + + writeFile(td.dir, "nomatch_top.txt", + "version v1\n" + "hash something\n" // missing "oid" on line 2 + "size 123\n"); + + // Create nested directory and file + fs::path sub = td.dir / "sub"; + mkdirs(sub); + writeFile(sub, "match_nested.txt", + " VERSION v1 \n" + "\toid: 123\n" + "size: 42\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + sortPaths(matches); + + std::vector expected = {fs::path("match_top.txt")}; + sortPaths(expected); + + EXPECT_EQ(matches, expected); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, RecursiveFindsNestedMatches) { + // Same layout as previous test but recursive = true; should include nested match as relative path + writeFile(td.dir, "top_match.txt", + "version spec\n" + "oid hash\n" + "size 1\n"); + + fs::path sub = td.dir / "a" / "b"; + mkdirs(sub); + writeFile(sub, "nested_match.txt", + "VeRsIoN\n" + "OID x\n" + "SiZe y\n"); + + // Add a deeper non-match to ensure it is ignored + fs::path deeper = td.dir / "a" / "b" / "c"; + mkdirs(deeper); + writeFile(deeper, "deep_nomatch.txt", + "hello\n" + "world\n" + "!\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + sortPaths(matches); + + std::vector expected = { + fs::path("top_match.txt"), + fs::path("a") / "b" / "nested_match.txt"}; + sortPaths(expected); + + EXPECT_EQ(matches, expected); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, MixedNewlinesInMatchingFilesAreHandled) { + // Requires underlying readFirstThreeLines + fileHasLfsKeywordsFirst3Positional to handle \r, \n, \r\n + writeFile(td.dir, "mixed1.txt", + "version one\r" + "oid two\n" + "size three\r\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + + ASSERT_EQ(matches.size(), 1u); + EXPECT_EQ(matches[0], fs::path("mixed1.txt")); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, WrongOrderOrMissingKeywordsAreNotIncluded) { + writeFile(td.dir, "wrong_order.txt", + "size 1\n" + "version 2\n" + "oid 3\n"); // wrong order → should not match + + writeFile(td.dir, "missing_second.txt", + "version v1\n" + "hash something\n" // missing "oid" + "size 3\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, OnlyRegularFilesConsidered) { + // Create a directory with LFS-like name to ensure it isn't treated as a file + fs::path lfsdir = td.dir / "version_oid_size_dir"; + mkdirs(lfsdir); + + // No files → nothing should match + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + EXPECT_TRUE(matches.empty()); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, ReturnsPathsRelativeToBaseDirectory) { + // Ensure results are made relative to the provided base dir. + writeFile(td.dir, "root_match.txt", + "version v\n" + "oid o\n" + "size s\n"); + fs::path sub = td.dir / "x" / "y"; + mkdirs(sub); + writeFile(sub, "nested_match.txt", + "version v\n" + "oid o\n" + "size s\n"); + + auto matches = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + sortPaths(matches); + + std::vector expected = { + fs::path("root_match.txt"), + fs::path("x") / "y" / "nested_match.txt"}; + sortPaths(expected); + + EXPECT_EQ(matches, expected); +} + +TEST_F(LibGit2FindLfsLikeFilesTest, NonRecursiveDoesNotDescendButStillUsesRelativePaths) { + fs::path sub = td.dir / "subdir"; + mkdirs(sub); + + writeFile(td.dir, "toplevel.txt", + "version a\n" + "oid b\n" + "size c\n"); + + writeFile(sub, "nested.txt", + "version a\n" + "oid b\n" + "size c\n"); + + auto matches_nonrec = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/false); + auto matches_rec = ovms::findLfsLikeFiles(td.dir.string(), /*recursive=*/true); + + // Non-recursive: only top-level + ASSERT_EQ(matches_nonrec.size(), 1u); + EXPECT_EQ(matches_nonrec[0], fs::path("toplevel.txt")); + + // Recursive: both, relative to base dir + sortPaths(matches_rec); + std::vector expected = { + fs::path("toplevel.txt"), + fs::path("subdir") / "nested.txt"}; + sortPaths(expected); + EXPECT_EQ(matches_rec, expected); +} diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index b29bbee326..5092f49dbf 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -14,6 +14,7 @@ // limitations under the License. //***************************************************************************** #include +#include #include #include @@ -39,6 +40,8 @@ #include "environment.hpp" +namespace fs = std::filesystem; + class HfDownloaderPullHfModel : public TestWithTempDir { protected: ovms::Server& server = ovms::Server::instance(); @@ -168,6 +171,161 @@ TEST_F(HfDownloaderPullHfModel, PositiveDownload) { ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; } +// Truncate the file to half its size, keeping the first half. +bool removeSecondHalf(const std::string& filrStr) { + const fs::path& file(filrStr); + std::error_code ec; + ec.clear(); + + if (!fs::exists(file, ec) || !fs::is_regular_file(file, ec)) { + if (!ec) + ec = std::make_error_code(std::errc::no_such_file_or_directory); + return false; + } + + const std::uintmax_t size = fs::file_size(file, ec); + if (ec) + return false; + + const std::uintmax_t newSize = size / 2; // floor(size/2) + fs::resize_file(file, newSize, ec); + return !ec; +} + +bool createGitLfsPointerFile(const std::string& path) { + std::ofstream file(path, std::ios::binary); + if (!file.is_open()) { + return false; + } + + file << "version https://git-lfs.github.com/spec/v1\n" + "oid sha256:cecf0224201415144c00cf3a6cf3350306f9c78888d631eb590939a63722fefa\n" + "size 52417240\n"; + + return true; +} + +// Returns lowercase hex SHA-256 string on success, empty string on failure. +std::string sha256File(std::string_view path, std::error_code& ec) { + ec.clear(); + + std::ifstream ifs(std::string(path), std::ios::binary); + if (!ifs) { + ec = std::make_error_code(std::errc::no_such_file_or_directory); + return {}; + } + + SHA256_CTX ctx; + if (SHA256_Init(&ctx) != 1) { + ec = std::make_error_code(std::errc::io_error); + return {}; + } + + // Read in chunks to support large files without high memory usage. + std::vector buffer(1 << 20); // 1 MiB + while (ifs) { + ifs.read(reinterpret_cast(buffer.data()), static_cast(buffer.size())); + std::streamsize got = ifs.gcount(); + if (got > 0) { + if (SHA256_Update(&ctx, buffer.data(), static_cast(got)) != 1) { + ec = std::make_error_code(std::errc::io_error); + return {}; + } + } + } + if (!ifs.eof()) { // read failed not due to EOF + ec = std::make_error_code(std::errc::io_error); + return {}; + } + + std::array digest{}; + if (SHA256_Final(digest.data(), &ctx) != 1) { + ec = std::make_error_code(std::errc::io_error); + return {}; + } + + // Convert to lowercase hex + std::ostringstream oss; + oss << std::hex << std::setfill('0') << std::nouppercase; + for (unsigned char b : digest) { + oss << std::setw(2) << static_cast(b); + } + return oss.str(); +} + +class TestHfDownloader : public ovms::HfDownloader { +public: + TestHfDownloader(const std::string& sourceModel, const std::string& downloadPath, const std::string& hfEndpoint, const std::string& hfToken, const std::string& httpProxy, bool overwrite) : + HfDownloader(sourceModel, downloadPath, hfEndpoint, hfToken, httpProxy, overwrite) {} + std::string GetRepoUrl() { return HfDownloader::GetRepoUrl(); } + std::string GetRepositoryUrlWithPassword() { return HfDownloader::GetRepositoryUrlWithPassword(); } + bool CheckIfProxySet() { return HfDownloader::CheckIfProxySet(); } + const std::string& getEndpoint() { return this->hfEndpoint; } + const std::string& getProxy() { return this->httpProxy; } + std::string getGraphDirectory(const std::string& downloadPath, const std::string& sourceModel) { return IModelDownloader::getGraphDirectory(downloadPath, sourceModel); } + std::string getGraphDirectory() { return HfDownloader::getGraphDirectory(); } + ovms::Status CheckRepositoryStatus(bool checkUntracked) { return HfDownloader::CheckRepositoryStatus(checkUntracked); } +}; + +TEST_F(HfDownloaderPullHfModel, Resume) { + std::string modelName = "OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov"; + std::string downloadPath = ovms::FileSystem::joinPath({this->directoryPath, "repository"}); + std::string task = "text_generation"; + this->ServerPullHfModel(modelName, downloadPath, task); + server.setShutdownRequest(1); + if (t) + t->join(); + server.setShutdownRequest(0); + + std::string ovModelName = "openvino_model.bin"; + std::string basePath = ovms::FileSystem::joinPath({this->directoryPath, "repository", "OpenVINO", "Phi-3-mini-FastDraft-50M-int8-ov"}); + std::string modelPath = ovms::FileSystem::appendSlash(basePath) + ovModelName; + std::string graphPath = ovms::FileSystem::appendSlash(basePath) + "graph.pbtxt"; + + ASSERT_EQ(std::filesystem::exists(modelPath), true) << modelPath; + ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; + ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); + std::string graphContents = GetFileContents(graphPath); + + ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + + // Check status function + std::unique_ptr hfDownloader = std::make_unique(modelName, ovms::IModelDownloader::getGraphDirectory(downloadPath, modelName), "", "", "", false); + + // Fails because we want clean and it has the graph.pbtxt after download + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_STATUS_UNCLEAN); + + std::error_code ec; + ec.clear(); + std::string expectedDigest = sha256File(modelPath, ec); + ASSERT_EQ(ec, std::errc()); + // Prepare a git repository with a lfs_part file and lfs pointer file to simulate partial download error of a big model + ASSERT_EQ(removeSecondHalf(modelPath), true); + ASSERT_EQ(std::filesystem::file_size(modelPath), 26208620); + + std::string ovModelPartLfsName = "openvino_model.binlfs_part"; + std::string ovModelPartLfsPath = ovms::FileSystem::appendSlash(basePath) + ovModelPartLfsName; + fs::rename(modelPath, ovModelPartLfsPath, ec); + ASSERT_EQ(ec, std::errc()); + ASSERT_EQ(std::filesystem::file_size(ovModelPartLfsPath), 26208620); + ASSERT_EQ(createGitLfsPointerFile(modelPath), true); + + // Call ovms pull to resume the file + this->ServerPullHfModel(modelName, downloadPath, task); + + ASSERT_EQ(std::filesystem::exists(ovModelPartLfsPath), false) << modelPath; + ASSERT_EQ(std::filesystem::exists(modelPath), true) << modelPath; + ASSERT_EQ(std::filesystem::exists(graphPath), true) << graphPath; + ASSERT_EQ(std::filesystem::file_size(modelPath), 52417240); + graphContents = GetFileContents(graphPath); + + ASSERT_EQ(expectedGraphContents, removeVersionString(graphContents)) << graphContents; + + std::string resumedDigest = sha256File(modelPath, ec); + ASSERT_EQ(ec, std::errc()); + ASSERT_EQ(expectedDigest, resumedDigest); +} + TEST_F(HfDownloaderPullHfModel, PositiveDownloadAndStart) { SKIP_AND_EXIT_IF_NOT_RUNNING_UNSTABLE(); // CVS-180127 // EnvGuard guard; @@ -301,19 +459,6 @@ class TestOptimumDownloader : public ovms::OptimumDownloader { bool checkIfTokenizerFileIsExported() { return ovms::OptimumDownloader::checkIfTokenizerFileIsExported(); } }; -class TestHfDownloader : public ovms::HfDownloader { -public: - TestHfDownloader(const std::string& sourceModel, const std::string& downloadPath, const std::string& hfEndpoint, const std::string& hfToken, const std::string& httpProxy, bool overwrite) : - HfDownloader(sourceModel, downloadPath, hfEndpoint, hfToken, httpProxy, overwrite) {} - std::string GetRepoUrl() { return HfDownloader::GetRepoUrl(); } - std::string GetRepositoryUrlWithPassword() { return HfDownloader::GetRepositoryUrlWithPassword(); } - bool CheckIfProxySet() { return HfDownloader::CheckIfProxySet(); } - const std::string& getEndpoint() { return this->hfEndpoint; } - const std::string& getProxy() { return this->httpProxy; } - std::string getGraphDirectory(const std::string& downloadPath, const std::string& sourceModel) { return IModelDownloader::getGraphDirectory(downloadPath, sourceModel); } - std::string getGraphDirectory() { return HfDownloader::getGraphDirectory(); } -}; - TEST(HfDownloaderClassTest, Methods) { std::string modelName = "model/name"; std::string downloadPath = "/path/to/Download"; @@ -337,6 +482,32 @@ TEST(HfDownloaderClassTest, Methods) { ASSERT_EQ(hfDownloader->getGraphDirectory(), expectedPath); } +TEST(HfDownloaderClassTest, RepositoryStatusCheckErrors) { + std::string modelName = "model/name"; + std::string downloadPath = "/path/to/Download"; + std::string hfEndpoint = "www.new_hf.com/"; + std::string hfToken = "123$$o_O123!AAbb"; + std::string httpProxy = "https://proxy_test1:123"; + std::unique_ptr hfDownloader = std::make_unique(modelName, ovms::IModelDownloader::getGraphDirectory(downloadPath, modelName), hfEndpoint, hfToken, httpProxy, false); + + // Fails without libgit init + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED); + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(false).getCode(), ovms::StatusCode::HF_GIT_LIGIT2_NOT_INITIALIZED); + + auto guardOrError = ovms::createGuard(); + ASSERT_EQ(std::holds_alternative(guardOrError), false); + + // Path does not exist + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH); + ASSERT_EQ(hfDownloader->CheckRepositoryStatus(false).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED_TO_RESOLVE_PATH); + + // Path not a git repository + downloadPath = getGenericFullPathForSrcTest("/tmp/"); + std::unique_ptr existingHfDownloader = std::make_unique(modelName, downloadPath, hfEndpoint, hfToken, httpProxy, false); + ASSERT_EQ(existingHfDownloader->CheckRepositoryStatus(true).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED); + ASSERT_EQ(existingHfDownloader->CheckRepositoryStatus(false).getCode(), ovms::StatusCode::HF_GIT_STATUS_FAILED); +} + class TestOptimumDownloaderSetup : public ::testing::Test { public: ovms::HFSettingsImpl inHfSettings; diff --git a/third_party/libgit2/lfs.patch b/third_party/libgit2/lfs.patch index 6139df9e44..b6daae42ae 100644 --- a/third_party/libgit2/lfs.patch +++ b/third_party/libgit2/lfs.patch @@ -24,6 +24,56 @@ index 31da49a88..d61c9735e 100644 if(BUILD_EXAMPLES) add_subdirectory(examples) endif() +diff --git a/cmake/ExperimentalFeatures.cmake b/cmake/ExperimentalFeatures.cmake +index 7eff40bdb..5562acc77 100644 +--- a/cmake/ExperimentalFeatures.cmake ++++ b/cmake/ExperimentalFeatures.cmake +@@ -18,6 +18,3 @@ else() + add_feature_info("SHA256 API" OFF "experimental SHA256 APIs") + endif() + +-if(EXPERIMENTAL) +- set(LIBGIT2_FILENAME "${LIBGIT2_FILENAME}-experimental") +-endif() +diff --git a/include/git2/oid.h b/include/git2/oid.h +index 0af9737a0..6d9a8b08a 100644 +--- a/include/git2/oid.h ++++ b/include/git2/oid.h +@@ -22,14 +22,8 @@ GIT_BEGIN_DECL + + /** The type of object id. */ + typedef enum { +- +-#ifdef GIT_EXPERIMENTAL_SHA256 + GIT_OID_SHA1 = 1, /**< SHA1 */ + GIT_OID_SHA256 = 2 /**< SHA256 */ +-#else +- GIT_OID_SHA1 = 1 /**< SHA1 */ +-#endif +- + } git_oid_t; + + /* +diff --git a/include/git2/repository.h b/include/git2/repository.h +index b203576af..26309dd3f 100644 +--- a/include/git2/repository.h ++++ b/include/git2/repository.h +@@ -184,6 +184,15 @@ GIT_EXTERN(int) git_repository_open_ext( + unsigned int flags, + const char *ceiling_dirs); + ++/** ++ * Set repository url member ++ * ++ * ++ * @param repo repository handle to update. If NULL nothing occurs. ++ * @param url the remote repository to clone or run checkout against. ++ */ ++GIT_EXTERN(int) git_repository_set_url(git_repository *repo, const char *url); ++ + /** + * Open a bare repository on the serverside. + * diff --git a/include/git2/sys/filter.h b/include/git2/sys/filter.h index 60466d173..a35ad5f98 100644 --- a/include/git2/sys/filter.h @@ -61,7 +111,7 @@ index d121c588a..b54a01a4b 100644 # diff --git a/src/cli/cmd_clone.c b/src/cli/cmd_clone.c -index c18cb28d4..286fa7153 100644 +index c18cb28d4..6d23dcbb1 100644 --- a/src/cli/cmd_clone.c +++ b/src/cli/cmd_clone.c @@ -146,6 +146,7 @@ int cmd_clone(int argc, char **argv) @@ -72,6 +122,76 @@ index c18cb28d4..286fa7153 100644 if (!checkout) clone_opts.checkout_opts.checkout_strategy = GIT_CHECKOUT_NONE; +@@ -182,6 +183,69 @@ int cmd_clone(int argc, char **argv) + + cli_progress_finish(&progress); + ++ ++ git_repository *repo2 = NULL; ++ int error = git_repository_open_ext(&repo2, local_path, 0, NULL); ++ // HEAD state info ++ bool is_detached = git_repository_head_detached(repo2) == 1; ++ bool is_unborn = git_repository_head_unborn(repo2) == 1; ++ ++ // Collect status (staged/unstaged/untracked) ++ git_status_options opts = GIT_STATUS_OPTIONS_INIT; ++ ++ opts.show = GIT_STATUS_SHOW_INDEX_AND_WORKDIR; ++ opts.flags = GIT_STATUS_OPT_INCLUDE_UNTRACKED // include untracked files ++ // // | ++ // GIT_STATUS_OPT_RENAMES_HEAD_TO_INDEX ++ // // detect renames ++ // HEAD->index - not ++ // required currently and ++ // impacts performance ++ | GIT_STATUS_OPT_SORT_CASE_SENSITIVELY; ++ ++ git_status_list *status_list = NULL; ++ ret = git_status_list_new(&status_list, repo2, &opts); ++ ++ size_t staged = 0, unstaged = 0, untracked = 0, conflicted = 0; ++ const size_t n = git_status_list_entrycount(status_list); ++ ++ for (size_t i = 0; i < n; ++i) { ++ const git_status_entry *e = git_status_byindex(status_list, i); ++ if (!e) ++ continue; ++ unsigned s = e->status; ++ ++ // Staged (index) changes ++ if (s & (GIT_STATUS_INDEX_NEW | GIT_STATUS_INDEX_MODIFIED | ++ GIT_STATUS_INDEX_DELETED | GIT_STATUS_INDEX_RENAMED | ++ GIT_STATUS_INDEX_TYPECHANGE)) ++ ++staged; ++ ++ // Unstaged (workdir) changes ++ if (s & (GIT_STATUS_WT_MODIFIED | GIT_STATUS_WT_DELETED | ++ GIT_STATUS_WT_RENAMED | GIT_STATUS_WT_TYPECHANGE)) ++ ++unstaged; ++ ++ // Untracked ++ if (s & GIT_STATUS_WT_NEW) ++ ++untracked; ++ ++ // Conflicted ++ if (s & GIT_STATUS_CONFLICTED) ++ ++conflicted; ++ } ++ ++ // Print summary (mirrors your original stream output) ++ printf("HEAD state : %s\n", ++ is_unborn ? "unborn (no commits)" : ++ (is_detached ? "detached" : "attached")); ++ printf("Staged changes : %zu\n", staged); ++ printf("Unstaged changes: %zu\n", unstaged); ++ printf("Untracked files : %zu", untracked); ++ if (conflicted) { ++ printf(" (%zu paths flagged)", conflicted); ++ } ++ printf("\n"); + done: + cli_progress_dispose(&progress); + git__free(computed_path); diff --git a/src/cli/progress.h b/src/cli/progress.h index f08d68f19..0344304ec 100644 --- a/src/cli/progress.h @@ -312,11 +432,11 @@ index 58cb4b424..00ddee9f3 100644 git_writestream **out, diff --git a/src/libgit2/lfs_filter.c b/src/libgit2/lfs_filter.c new file mode 100644 -index 000000000..484811a0c +index 000000000..fc3120cca --- /dev/null +++ b/src/libgit2/lfs_filter.c -@@ -0,0 +1,567 @@ -+/* +@@ -0,0 +1,810 @@ ++/* +/ Copyright 2025 Intel Corporation +/ +/ Licensed under the Apache License, Version 2.0 (the "License"); @@ -337,6 +457,8 @@ index 000000000..484811a0c + +#include +#include "git2/sys/filter.h" ++#include "hash.h" ++#include "oid.h" +#include "filter.h" +#include "str.h" +#include "repository.h" @@ -351,6 +473,7 @@ index 000000000..484811a0c + const char *lfs_oid; + const char *lfs_size; + const char *url; ++ bool is_download; +} lfs_attrs; + +static size_t get_digit(const char *buffer) @@ -360,27 +483,49 @@ index 000000000..484811a0c + size_t number = strtoull(buffer, &endptr, 10); + + if (errno == ERANGE) { -+ fprintf(stderr, "Conversion error\n"); ++ fprintf(stderr, "\n[ERROR] Conversion error\n"); + } + if (endptr == buffer) { -+ fprintf(stderr, "No digits were found\n"); ++ fprintf(stderr, "\n[ERROR] No digits were found\n"); + } else if (*endptr != '\0') { -+ fprintf(stderr, "Additional characters after number: %s\n", endptr); ++ fprintf(stderr, "\n[ERROR] Additional characters after number: %s\n", endptr); + } + + return number; +} + -+char *append_char_to_buffer(char *existingBuffer, char additionalChar) ++/** ++ * Appends a C-string `suffix` to `existingBuffer` by allocating a new buffer. ++ * The original `existingBuffer` is not modified. ++ * ++ * Returns: ++ * - Newly allocated buffer containing the concatenation, or ++ * - NULL on allocation failure or if inputs are invalid. ++ * ++ * Note: Caller is responsible for freeing the returned buffer. ++ */ ++char *append_cstr_to_buffer(const char *existingBuffer, const char *suffix) +{ ++ if (existingBuffer == NULL || suffix == NULL) { ++ return NULL; ++ } ++ + size_t existingLength = strlen(existingBuffer); -+ char *newBuffer = (char *)malloc((existingLength + 2) * sizeof(char)); ++ size_t suffixLength = strlen(suffix); ++ ++ /* +1 for the null terminator */ ++ size_t newSize = existingLength + suffixLength + 1; ++ ++ char *newBuffer = (char *)malloc(newSize); + if (newBuffer == NULL) { + return NULL; + } -+ strcpy(newBuffer, existingBuffer); -+ newBuffer[existingLength] = additionalChar; -+ newBuffer[existingLength + 1] = '\0'; ++ ++ /* Copy existing and then append suffix */ ++ memcpy(newBuffer, existingBuffer, existingLength); ++ memcpy(newBuffer + existingLength, suffix, suffixLength); ++ newBuffer[newSize - 1] = '\0'; ++ + return newBuffer; +} + @@ -412,6 +557,142 @@ index 000000000..484811a0c + return -1; +} + ++void print_src_oid(const git_filter_source *src) ++{ ++ const git_oid *oid = git_filter_source_id(src); ++ ++ if (oid) { ++ printf("\nsrc->git_oid %s\n", git_oid_tostr_s(oid)); ++ } else { ++ printf("\nsrc has no OID (e.g., not a blob-backed source or unavailable)\n"); ++ } ++} ++ ++int git_oid_sha256_from_git_str_blob( ++ git_oid *out, ++ const struct git_str *input, ++ char *pointer_line, ++ size_t pointer_line_cap) ++{ ++ int error = -1; ++ git_hash_ctx ctx; ++ ++ if (!out || !input || !input->ptr) { ++ return -1; ++ } ++ ++ if (!pointer_line || ++ pointer_line_cap < (size_t)(strlen("oid sha256:") + 64 + 1)) { ++ return -1; ++ } ++ ++ /* 1) Init SHA-256 hashing context (internal API) */ ++ if (git_hash_ctx_init(&ctx, GIT_HASH_ALGORITHM_SHA256) < 0) { ++ fprintf(stderr, "\n[ERROR] git_hash_ctx_init failed\n"); ++ return -1; ++ } ++ ++ /* 2) Stream the payload in chunks — hash *only* the file bytes. */ ++ const size_t CHUNK = 4 * 1024 * 1024; /* 4 MiB */ ++ const unsigned char *p = (const unsigned char *)input->ptr; ++ size_t remaining = input->size; ++ ++ while (remaining > 0) { ++ size_t n = remaining > CHUNK ? CHUNK : remaining; ++ if (git_hash_update(&ctx, p, n) < 0) { ++ fprintf(stderr, "\n[ERROR] git_hash_update failed\n"); ++ return -1; ++ } ++ p += n; ++ remaining -= n; ++ } ++ ++ /* 3) Finalize into git_oid (32-byte raw digest for SHA-256). */ ++ if (git_hash_final(out->id, &ctx) < 0) { ++ fprintf(stderr, "\n[ERROR] git_hash_final failed\n"); ++ return -1; ++ } ++ ++ /* 4) Optionally format "oid sha256:" for the LFS pointer file. */ ++ if (pointer_line && ++ pointer_line_cap >= (size_t)(strlen("oid sha256:") + 64 + 1)) { ++ char hex[64 + 1]; ++ /* Formats full hex; no NUL added. */ ++ if (git_oid_fmt(hex, out) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, git_oid_fmt failed\n"); ++ return -1; ++ } ++ ++ hex[64] = '\0'; ++ snprintf(pointer_line, pointer_line_cap, "oid sha256:%s", hex); ++ } ++ ++ return 0; ++} ++ ++static int lfs_remove_id( ++ git_str *to, ++ const git_str *from, ++ void **payload) ++{ ++ int error = 0; ++ /* Init the lfs attrs to indicate git lfs clean, currently only diff support no upload of lfs file supported */ ++ struct lfs_attrs la = { NULL, NULL, NULL, NULL, NULL, NULL, false }; ++ *payload = git__malloc(sizeof(la)); ++ GIT_ERROR_CHECK_ALLOC(*payload); ++ memcpy(*payload, &la, sizeof(la)); ++ ++ if(!from) return -1; ++ ++ /* lfs spec - return empty pointer when the file is empty */ ++ if (from->size == 0) { ++ git_str_init(to, 0); ++ return 0; ++ } ++ ++ /* Use lib git oid to get lfs sha256 */ ++ git_oid lfs_oid; ++ lfs_oid.type = GIT_OID_SHA256; ++ char line[80]; /* 75+ is enough */ ++ if (git_oid_sha256_from_git_str_blob(&lfs_oid, from, line, sizeof(line)) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot calculate sha256\n"); ++ return -1; ++ } ++ ++ git_str_init(to, 0); ++ ++ /* 1) version line (LFS spec requires this literal string) */ ++ if ((error = git_str_puts( ++ to, "version https://git-lfs.github.com/spec/v1\n")) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_puts failed\n"); ++ return error; ++ } ++ ++ ++ /* 2) the oid line passed by caller (must end with '\n') */ ++ if ((error = git_str_puts(to, line)) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_puts failed\n"); ++ return error; ++ } ++ ++ if (line[strlen(line) - 1] != '\n') { ++ if ((error = git_str_putc(to, '\n')) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_putc failed\n"); ++ return error; ++ } ++ } ++ ++ /* 3) size line from the original file size */ ++ if ((error = git_str_printf(to, "size %zu\n", from->size)) < 0) { ++ fprintf(stderr, "\n[ERROR] git_str_printf failed\n"); ++ return error; ++ } ++ ++ return 0; ++} ++ +static int lfs_insert_id( + git_str *to, const git_str *from, const git_filter_source *src, void** payload) +{ @@ -427,23 +708,35 @@ index 000000000..484811a0c + + const char *obj_regexp = "\noid sha256:(.*)\n"; + const char *size_regexp = "\nsize (.*)\n"; -+ if (get_lfs_info_match(&lfs_oid, obj_regexp) < 0) ++ ++ if (get_lfs_info_match(&lfs_oid, obj_regexp) < 0) { ++ fprintf(stderr,"\n[ERROR] failure, cannot find lfs oid in: %s\n", ++ lfs_oid.ptr); + return -1; ++ } + -+ if (get_lfs_info_match(&lfs_size, size_regexp) < 0) ++ if (get_lfs_info_match(&lfs_size, size_regexp) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot find lfs size in: %s\n", ++ lfs_size.ptr); + return -1; ++ } + + git_repository *repo = git_filter_source_repo(src); + const char *path = git_filter_source_path(src); + + git_str full_path = GIT_STR_INIT; -+ if (git_repository_workdir_path(&full_path, repo, path) < 0) ++ if (git_repository_workdir_path(&full_path, repo, path) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failure, cannot get repository path: %s\n", ++ path); + return -1; ++ } + + size_t workdir_size = strlen(git_repository_workdir(repo)); + + const char *workdir = git_repository_workdir(repo); -+ struct lfs_attrs la = { path, full_path.ptr, workdir, lfs_oid.ptr, lfs_size.ptr, repo->url }; ++ struct lfs_attrs la = { path, full_path.ptr, workdir, lfs_oid.ptr, lfs_size.ptr, repo->url, true }; + + *payload = git__malloc(sizeof(la)); + GIT_ERROR_CHECK_ALLOC(*payload); @@ -462,12 +755,12 @@ index 000000000..484811a0c +{ + GIT_UNUSED(self); GIT_UNUSED(payload); + ++ /* for download of the lfs pointer files */ + if (git_filter_source_mode(src) == GIT_FILTER_SMUDGE) + return lfs_insert_id(to, from, src, payload); -+ /*else -+ * PATH for upload lfs files not needed -+ return lfs_remove_id(to, from); -+ */ ++ else ++ /* for upload or diff of the lfs pointer files */ ++ return lfs_remove_id(to, from, payload); + return 0; +} + @@ -623,7 +916,7 @@ index 000000000..484811a0c + /* open file for writing */ + out->stream = fopen(out->filename, "wb"); + if (!out->stream) { -+ fprintf(stderr, "failure, cannot open file to write: %s\n", ++ fprintf(stderr, "\n[ERROR] failure, cannot open file to write: %s\n", + out->filename); + return 0; /* failure, cannot open file to write */ + } @@ -668,6 +961,37 @@ index 000000000..484811a0c + status = setopt; \ + } + ++int get_curl_resume_url(CURL *dl_curl, struct FtpFile* ftpfile) ++{ ++ /* ++ curl_off_t resume_from = 0; ++ curl_easy_getinfo( ++ dl_curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &resume_from); ++ ++ if (resume_from == -1) { ++ fprintf(stderr, ++ "\n[ERROR] curl_easy_perform() failed with transferred a partial file error and server does not support range/resume.\n"); ++ } else { ++ */ ++ printf("\n[INFO] curl_easy_perform() trying to resume file download\n"); ++ curl_off_t offset = 0; ++ if (ftpfile->stream) { ++ fseek(ftpfile->stream, 0, SEEK_END); ++ offset = ftell(ftpfile->stream); ++ } else { ++ ftpfile->stream = fopen(ftpfile->filename, "ab+"); ++ if (ftpfile->stream) { ++ fseek(ftpfile->stream, 0, SEEK_END); ++ offset = ftell(ftpfile->stream); ++ } ++ } ++ ++ /* Tell libcurl to resume */ ++ curl_easy_setopt(dl_curl, CURLOPT_RESUME_FROM_LARGE, offset); ++ /* Perform the request, res gets the return code */ ++ return curl_easy_perform(dl_curl); ++} ++ +/** + * lfs_download - Downloads a file using the LFS (Large File Storage) mechanism. + * @@ -690,11 +1014,18 @@ index 000000000..484811a0c +{ + GIT_UNUSED(self); + if (!payload) { -+ fprintf(stderr, "lfs payload not initialized"); ++ fprintf(stderr, "\n[ERROR] lfs payload not initialized\n"); + return; + } + struct lfs_attrs *la = (struct lfs_attrs *)payload; -+ char *tmp_out_file = append_char_to_buffer(la->full_path, '2'); ++ ++ /* Currently only download is supoprted, no lfs file upload */ ++ if (!la->is_download) { ++ git__free(payload); ++ return; ++ } ++ ++ char *tmp_out_file = append_cstr_to_buffer(la->full_path, "lfs_part"); + + CURL *info_curl,*dl_curl; + CURLcode res = CURLE_OK; @@ -709,7 +1040,7 @@ index 000000000..484811a0c + &lfs_info_url, '.', + la->url, + "git/info/lfs/objects/batch") < 0) { -+ fprintf(stderr, "failed to create url '%s'", ++ fprintf(stderr, "\n[ERROR] failed to create url '%s'\n", + la->full_path); + goto on_error; + } @@ -726,7 +1057,7 @@ index 000000000..484811a0c + CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_URL, lfs_info_url.ptr)); + + if (status != CURLE_OK) { -+ fprintf(stderr, "curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); ++ fprintf(stderr, "\n[ERROR] curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); + goto info_cleaup; + } + git_str lfs_info_data = GIT_STR_INIT; @@ -739,7 +1070,7 @@ index 000000000..484811a0c + ",\"size\":", + la->lfs_size, + "}]}" ) < 0) { -+ fprintf(stderr, "failed to create url '%s'", ++ fprintf(stderr, "\n[ERROR] failed to create url '%s'\n", + la->full_path); + /* always cleanup */ + curl_easy_cleanup(info_curl); @@ -759,14 +1090,14 @@ index 000000000..484811a0c + CURL_SETOPT(curl_easy_setopt(info_curl, CURLOPT_WRITEDATA, (void *)&response)); + + if (status != CURLE_OK) { -+ fprintf(stderr, "curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); ++ fprintf(stderr, "\n[ERROR] curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); + goto info_cleaup; + } + /* Perform the request, res gets the return code */ + res = curl_easy_perform(info_curl); + /* Check for errors */ + if (res != CURLE_OK) { -+ fprintf(stderr, "curl_easy_perform() failed: %s\n", ++ fprintf(stderr, "\n[ERROR] curl_easy_perform() failed: %s\n", + curl_easy_strerror(res)); + /* always cleanup */ + curl_easy_cleanup(info_curl); @@ -789,6 +1120,7 @@ index 000000000..484811a0c + } + + /* get a curl handle */ ++ bool resumingFileByBlobFilter = false; + dl_curl = curl_easy_init(); + if (dl_curl) { + struct FtpFile ftpfile = { tmp_out_file, NULL }; @@ -817,46 +1149,77 @@ index 000000000..484811a0c + CURL_SETOPT(curl_easy_setopt(dl_curl, CURLOPT_XFERINFODATA, &progress_d)); + + if (status != CURLE_OK) { -+ fprintf(stderr, "curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); ++ fprintf(stderr, "\n[ERROR] curl_easy_setopt() failed: %s\n", curl_easy_strerror(status)); + curl_easy_cleanup(dl_curl); + goto on_error; + } -+ print_download_info(la->full_path, get_digit(la->lfs_size)); -+ /* Perform the request, res gets the return code */ -+ res = curl_easy_perform(dl_curl); ++ ++ /* Check for resume if previous download failed and we have the partial file on disk */ ++ ftpfile.stream = fopen(ftpfile.filename, "r"); ++ if (ftpfile.stream != NULL) ++ { ++ resumingFileByBlobFilter = true; ++ fclose(ftpfile.stream); ++ ftpfile.stream = NULL; ++ res = get_curl_resume_url(dl_curl, &ftpfile); ++ } else { ++ print_download_info( ++ la->full_path, get_digit(la->lfs_size)); ++ /* Perform the request, res gets the return code */ ++ res = curl_easy_perform(dl_curl); ++ } ++ ++ /* Check for resume of partial download error */ ++ if (res == CURLE_PARTIAL_FILE) { ++ res = get_curl_resume_url(dl_curl, &ftpfile); ++ } ++ + /* Check for errors */ + if (res != CURLE_OK) { -+ fprintf(stderr, "curl_easy_perform() failed: %s\n", ++ fprintf(stderr, "\n[ERROR] curl_easy_perform() failed: %s\n", + curl_easy_strerror(res)); -+ if (ftpfile.stream) ++ if (ftpfile.stream) { + fclose(ftpfile.stream); ++ ftpfile.stream = NULL; ++ } + /* always cleanup */ + curl_easy_cleanup(dl_curl); + goto on_error; + } + -+ if (ftpfile.stream) ++ if (ftpfile.stream) { + fclose(ftpfile.stream); ++ ftpfile.stream = NULL; ++ } + /* always cleanup */ + curl_easy_cleanup(dl_curl); + } + -+ /* Remove lfs file and rename downloaded file to oryginal lfs filename */ -+ if (p_unlink(la->full_path) < 0) { -+ fprintf(stderr, "failed to delete file '%s'", la->full_path); -+ goto on_error; ++ /* Remove lfs file and rename downloaded file to oryginal lfs filename */ ++ if (!resumingFileByBlobFilter) { ++ /* File does not exist when using blob filters */ ++ if (p_unlink(la->full_path) < 0) { ++ fprintf(stderr, ++ "\n[ERROR] failed to delete file '%s'\n", ++ la->full_path); ++ goto on_error; ++ } + } + + if (p_rename(tmp_out_file, la->full_path) < 0) { -+ fprintf(stderr, "failed to rename file to '%s'", la->full_path); ++ fprintf(stderr, "\n[ERROR] failed to rename file to '%s'\n", la->full_path); + goto on_error; + } ++ free(tmp_out_file); + git__free(payload); + return; + -+on_error: ++ on_error: ++ fprintf(stderr, "\n[ERROR] LFS download failed for file %s\n", ++ la->full_path); ++ fflush(stderr); ++ free(tmp_out_file); + git__free(payload); -+ fprintf(stderr, "LFS download failed for file %s\n", la->full_path); + return; +} + @@ -884,7 +1247,7 @@ index 000000000..484811a0c + return f; +} diff --git a/src/libgit2/repository.c b/src/libgit2/repository.c -index 73876424a..6c267bc98 100644 +index 73876424a..f374d7f51 100644 --- a/src/libgit2/repository.c +++ b/src/libgit2/repository.c @@ -190,6 +190,7 @@ void git_repository_free(git_repository *repo) @@ -895,6 +1258,23 @@ index 73876424a..6c267bc98 100644 git__memzero(repo, sizeof(*repo)); git__free(repo); +@@ -1104,6 +1105,16 @@ static int repo_is_worktree(unsigned *out, const git_repository *repo) + return error; + } + ++int git_repository_set_url( ++ git_repository *repo, ++ const char *url) ++{ ++ GIT_ASSERT_ARG(repo); ++ GIT_ASSERT_ARG(url); ++ repo->url = git__strdup(url); ++ return 0; ++} ++ + int git_repository_open_ext( + git_repository **repo_ptr, + const char *start_path, diff --git a/src/libgit2/repository.h b/src/libgit2/repository.h index fbf143894..1890c61c1 100644 --- a/src/libgit2/repository.h diff --git a/third_party/libgit2/libgit2_engine.bzl b/third_party/libgit2/libgit2_engine.bzl index 605eb0edca..b4713bce57 100644 --- a/third_party/libgit2/libgit2_engine.bzl +++ b/third_party/libgit2/libgit2_engine.bzl @@ -51,6 +51,7 @@ def _impl(repository_ctx): out_static = "out_interface_libs = [\"{lib_name}.lib\"],".format(lib_name=lib_name) out_libs = "out_shared_libs = [\"{lib_name}.dll\"],".format(lib_name=lib_name) cache_entries = """ + "EXPERIMENTAL_SHA256": "ON", "CMAKE_POSITION_INDEPENDENT_CODE": "ON", "CMAKE_CXX_FLAGS": " /guard:cf /GS -s -D_GLIBCXX_USE_CXX11_ABI=1", "CMAKE_LIBRARY_OUTPUT_DIRECTORY": "Debug", @@ -66,6 +67,7 @@ def _impl(repository_ctx): out_static = "" out_libs = "out_shared_libs = [\"{lib_name}.so\"],".format(lib_name=lib_name) cache_entries = """ + "EXPERIMENTAL_SHA256": "ON", "CMAKE_POSITION_INDEPENDENT_CODE": "ON", "CMAKE_CXX_FLAGS": " /guard:cf -s -D_GLIBCXX_USE_CXX11_ABI=1 -Wno-error=deprecated-declarations -Wuninitialized", "CMAKE_ARCHIVE_OUTPUT_DIRECTORY": "lib",