Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/hive_cli/platform/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from hive_cli.config import HiveConfig
from hive_cli.runtime.runtime import Runtime
from hive_cli.utils import git, image
from hive_cli.utils import time as utime
from hive_cli.utils.logger import logger


Expand Down Expand Up @@ -96,7 +97,7 @@ def prepare_images(self, config: HiveConfig, push: bool = False) -> str:
)

dest = Path(temp_repo_dir) / "repo"
hash = git.get_codebase(config.repo.source, str(dest), config.repo.branch)
git.get_codebase(config.repo.source, str(dest), config.repo.branch)
logger.debug(
f"Cloning repository {config.repo.source} to {dest}, the tree structure of the directory: {os.listdir('.')}, the tree structure of the {dest} directory: {os.listdir(dest)}"
)
Expand Down Expand Up @@ -144,8 +145,8 @@ def prepare_images(self, config: HiveConfig, push: bool = False) -> str:
"Unsupported cloud provider configuration. Please enable GCP or AWS."
)

# Use the git commit hash as the image tag to ensure uniqueness.
image_name = f"{image_registry}:{hash[:7]}"
tag = utime.now_us()
image_name = f"{image_registry}:{tag}"

logger.debug(
f"Building sandbox image {image_name} in {temp_sandbox_dir} with push={push}"
Expand Down
20 changes: 1 addition & 19 deletions src/hive_cli/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,16 @@

import git

from hive_cli.utils import time as utime
from hive_cli.utils.logger import logger


def get_codebase(source: str, dest: str, branch: str = "main") -> str:
def get_codebase(source: str, dest: str, branch: str = "main") -> None:
"""
Copy/clone repository from the given source to the destination directory.
Args:
source (str): The URL or path of the repository to clone.
dest (str): The directory where the repository will be cloned.
branch (str): The branch to checkout after cloning. Default is "main".
Returns:
str: The commit hash of the cloned repository.
"""
# Case `source` is a URL, we clone it.
if source.startswith("https://"):
Expand All @@ -39,18 +36,3 @@ def get_codebase(source: str, dest: str, branch: str = "main") -> str:
logger.debug(f"Copying repository from {source} to {dest}")
shutil.copytree(source_path, dest, dirs_exist_ok=True)

# Get the current commit hash if it's a git repository.
if os.path.exists(os.path.join(source_path, ".git")):
repo = git.Repo(source_path)
else:
# If not a repo, return a timestamp-based identifier.
logger.warning(
f"Source path {source} is not a git repository. Using timestamp as hash."
)
return utime.now_2_hash()
try:
code_version_id = repo.head.commit.hexsha
except Exception as e:
raise ValueError(f"Repository at {dest} has no commits yet: {e}") from e
logger.debug(f"Repository copied successfully with commit ID {code_version_id}")
return code_version_id
4 changes: 4 additions & 0 deletions src/hive_cli/utils/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ def humanize_time(timestamp: str) -> str:
return age


def now_us() -> str:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to add a git hash as well, you never know when we need the hash, it will look like "1772722212345678-a1b2c3d", use the first 7 characters of the hash. WDYT?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dashboard already contains all the source code. I think it includes the .git directory. Either way, if we want the git commit, then we should make it part of the metadata visible from the dashboard.

If we add it how does one know it's the git commit and not the random tag appended to (most) runs? Do we need to add -dirty? Should we hash the non-tracked files ourselves? What about git ignored files?

Personally, I like that there's only one variation. If it's buggy it affects everyone equally and therefore gets spotted more reliably.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I don't see the benefit; but I also don't see a real negative of adding it. So if it's a big plus for you, we can add it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forgot the fact that the code is not related to the git anymore, since you won't commit the change, forget the suggestion then.

return str(int(datetime.now(timezone.utc).timestamp() * 1_000_000))


def now_2_hash() -> str:
timestamp = str(int(datetime.now(timezone.utc).timestamp()))
unique_hash = hashlib.sha1(timestamp.encode()).hexdigest()[:7]
Expand Down
51 changes: 8 additions & 43 deletions tests/utils/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,17 @@ def __init__(self, hexsha):


class _MockHead:
def __init__(self, hexsha=HEXSHA, raise_on_access=False):
def __init__(self, hexsha=HEXSHA):
self._hexsha = hexsha
self._raise = raise_on_access

@property
def commit(self):
if self._raise:
raise RuntimeError("no commit")
return _MockCommit(self._hexsha)


class _MockRepo:
def __init__(self, hexsha=HEXSHA, raise_on_access=False):
self.head = _MockHead(hexsha=hexsha, raise_on_access=raise_on_access)
def __init__(self, hexsha=HEXSHA):
self.head = _MockHead(hexsha=hexsha)
self.checked_out = None
self.git = types.SimpleNamespace(
checkout=lambda branch: setattr(self, "checked_out", branch)
Expand Down Expand Up @@ -92,7 +89,7 @@ def test_clone_url_without_token(monkeypatch, tmp_path, mock_git):
url = "https://github.com/org/repo.git"
dest = tmp_path / "dest"

hexsha = get_codebase(url, str(dest), branch="develop")
get_codebase(url, str(dest), branch="develop")

# clone_from called with original URL (no token injected)
called_url, called_dest = mock_git["clone_args"]
Expand All @@ -102,17 +99,14 @@ def test_clone_url_without_token(monkeypatch, tmp_path, mock_git):
# checkout called with the given branch
assert mock_git["repo"].checked_out == "develop"

# returned commit hash matches mock
assert hexsha == mock_git["repo"].head.commit.hexsha


def test_clone_url_with_token_injected(monkeypatch, tmp_path, mock_git):
monkeypatch.setenv("GITHUB_TOKEN", "SECRET123")

url = "https://github.com/org/repo.git"
dest = tmp_path / "dest"

_ = get_codebase(url, str(dest))
get_codebase(url, str(dest))

called_url, _ = mock_git["clone_args"]
assert called_url.startswith("https://x-access-token:SECRET123@")
Expand Down Expand Up @@ -141,48 +135,19 @@ def test_local_git_repo_copy(monkeypatch, tmp_path, mock_git, mock_copytree):

dest = tmp_path / "dest"

hexsha = get_codebase(str(src), str(dest))
get_codebase(str(src), str(dest))

# copytree was invoked with dirs_exist_ok=True
assert mock_copytree["args"] == (src.resolve(), dest, True)

# git.Repo was constructed with the SOURCE path (not dest)
assert Path(mock_git["repo_arg"]).resolve() == src.resolve()

# Returned hash is from the mock repo
assert hexsha == HEXSHA


def test_local_non_git_returns_timestamp(tmp_path, mock_copytree, caplog):
def test_local_non_git_copy(tmp_path, mock_copytree):
# No .git directory -> non-git path
src = tmp_path / "src"
src.mkdir()
dest = tmp_path / "dest"

result = get_codebase(str(src), str(dest))
get_codebase(str(src), str(dest))

# copytree still happens
assert mock_copytree["args"] == (src.resolve(), dest, True)

assert len(result) == 7

# warning logged (optional but nice to assert)
assert any("is not a git repository" in rec.getMessage() for rec in caplog.records)


def test_repo_without_commits_raises_valueerror(monkeypatch, tmp_path, mock_git):
import hive_cli.utils.git as target_module

# Make clone_from return a repo whose head.commit access raises
def bad_clone(url, dest, *args, **kwargs):
return _MockRepo(raise_on_access=True)

monkeypatch.delenv("GITHUB_TOKEN", raising=False)
monkeypatch.setattr(target_module.git.Repo, "clone_from", bad_clone, raising=True)

url = "https://github.com/org/repo.git"
dest = tmp_path / "dest"

with pytest.raises(ValueError) as exc:
target_module.get_codebase(url, str(dest))
assert "has no commits yet" in str(exc.value)
17 changes: 0 additions & 17 deletions tests/utils/test_time.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,6 @@
import hashlib
from datetime import datetime, timezone
from unittest.mock import patch

from src.hive_cli.utils.time import now_2_hash


def test_now_2_hash():
fixed_timestamp = 1700000000
fixed_datetime = datetime.fromtimestamp(fixed_timestamp, tz=timezone.utc)

class FixedDateTime(datetime):
@classmethod
def now(cls, tz=None):
return fixed_datetime

with patch("src.hive_cli.utils.time.datetime", FixedDateTime):
expected_hash = hashlib.sha1(str(fixed_timestamp).encode()).hexdigest()[:7]
assert now_2_hash() == expected_hash


def test_humanize_time():
from src.hive_cli.utils.time import humanize_time
Expand Down
Loading