from __future__ import annotations
import dataclasses
import json
import logging
import re
import subprocess
import warnings
from dataclasses import dataclass
from functools import cache, cached_property
from pathlib import Path
import requests
from git import InvalidGitRepositoryError
from git.refs import Head
from git.repo import Repo
from . import utils
from .aliases import PathOrStr
from .exceptions import *
__all__ = ["GitRepoState"]
log = logging.getLogger(__name__)
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$")
def _parse_git_remote_url(url: str) -> tuple[str, str]:
"""
Parse a git remote URL into a GitHub (account, repo) pair.
:raises InvalidRemoteError: If the URL can't be parsed correctly.
"""
if "github.com" not in url:
if url.count("/") == 1:
account, repo = url.split("/")
if _GITHUB_NAME_RE.match(account) and _GITHUB_NAME_RE.match(repo):
return account, repo
raise InvalidRemoteError(f"Remote ('{url}') must point to a GitHub repo")
try:
account, repo = url.split("github.com", 1)[-1].strip("/:").split(".git")[0].split("/")
except ValueError:
raise InvalidRemoteError(f"Failed to parse GitHub repo path from remote '{url}'")
return account, repo
@cache
def _resolve_repo() -> Repo:
try:
return Repo(".")
except InvalidGitRepositoryError as e:
raise GitError(
f"gantry must be run from the ROOT of a valid git repository "
f"unless {utils.fmt_opt('--remote')} is provided to use a remote repository."
) from e
[docs]
@dataclass
class GitRepoState:
"""
Represents the state of a local git repository.
.. tip::
Use :meth:`from_env()` to instantiate this class.
"""
repo: str
"""
The repository name, e.g. ``"allenai/beaker-gantry"``.
"""
repo_url: str
"""
The repository URL for cloning, e.g. ``"https://github.com/allenai/beaker-gantry"``.
"""
ref: str
"""
The current commit ref/SHA.
"""
branch: str | None = None
"""
The current active branch, if any.
"""
_is_remote: bool = dataclasses.field(repr=False, default=False)
@property
def is_dirty(self) -> bool:
"""
If the local repository state is dirty (uncommitted changes).
"""
if self._is_remote:
return False
repo = _resolve_repo()
return repo.is_dirty()
@cached_property
def is_public(self) -> bool:
"""
If the repository is public.
"""
response = requests.get(self.repo_url)
if response.status_code not in {200, 404}:
response.raise_for_status()
return response.status_code == 200
@property
def short_ref(self) -> str:
"""
Short, 7-character version of the current :data:`ref`.
"""
if len(self.ref) == 40 and self.ref.isalnum():
return self.ref[:7]
else:
return self.ref
@property
def ref_url(self) -> str:
"""
The URL to the current :data:`ref`.
"""
return f"{self.repo_url}/commit/{self.short_ref}"
@property
def branch_url(self) -> str | None:
"""
The URL to the current active :data:`branch`.
"""
if self.branch is None:
return None
else:
return f"{self.repo_url}/tree/{self.branch}"
@cached_property
def commit_message(self) -> str | None:
"""
Full commit message.
"""
if self._is_remote:
return None
repo = _resolve_repo()
try:
return str(repo.commit(self.ref).message)
except Exception:
return None
[docs]
def short_commit_message(self, max_length: int = 50) -> str | None:
"""
The commit message, truncated to ``max_length`` characters.
"""
if self.commit_message is None:
return None
msg = self.commit_message.split("\n")[0].strip()
if len(msg) <= max_length:
return msg
else:
return msg[: max_length - 1] + "…"
[docs]
def is_in_tree(self, path: PathOrStr) -> bool:
"""
Check if a file is in the tree.
"""
if not self._is_remote:
path = Path(path).resolve().relative_to(Path("./").resolve())
repo = _resolve_repo()
tree = repo.commit(self.ref).tree
return str(path) in tree
try:
res = subprocess.run(
[
"gh",
"api",
f"repos/{self.repo}/contents/{path}?ref={self.ref}",
"--jq",
".name",
],
capture_output=True,
text=True,
)
except FileNotFoundError:
raise ConfigurationError(
f"Attempted to use the GitHub CLI to pull metadata about the remote repo '{self.repo}', "
f"however it appears that it's not installed. "
f"Please ensure the GitHub CLI is installed and try again."
)
try:
res.check_returncode()
return True
except Exception:
try:
status = int(json.loads(res.stdout)["status"])
if status == 404:
return False
except Exception:
pass
raise
[docs]
@classmethod
def from_remote(
cls, remote_url: str, ref: str | None = None, branch: str | None = None
) -> GitRepoState:
"""
Instantiate this class from a remote repository.
"""
account, repo_name = _parse_git_remote_url(remote_url)
if ref is None:
if branch is None:
raise ConfigurationError(
f"Either {utils.fmt_opt('--ref')} or {utils.fmt_opt('--branch')} is required "
f"when using remote repositories."
)
try:
res = subprocess.run(
[
"gh",
"api",
f"repos/{account}/{repo_name}/commits",
"-f",
f"sha={branch}",
"--method",
"GET",
"--jq",
".[0].sha",
],
capture_output=True,
text=True,
)
except FileNotFoundError:
raise ConfigurationError(
f"Since {utils.fmt_opt('--ref')} was not provided, attempted to determine the SHA of the latest "
f"commit automatically using the GitHub CLI, however it appears that it's not installed. "
f"Please provide a {utils.fmt_opt('--ref')} (SHA of a commit to use) or ensure the GitHub CLI is installed."
)
try:
res.check_returncode()
except Exception:
raise ConfigurationError(
f"Since {utils.fmt_opt('--ref')} was not provided, attempted to determine the SHA of the latest "
f"commit automatically using the GitHub CLI, however this failed with:\n"
f"{res.stderr}\n"
f"You can avoid this issue by providing a {utils.fmt_opt('--ref')} (SHA of a commit to use)."
)
ref = res.stdout.strip()
return cls(
repo=f"{account}/{repo_name}",
repo_url=f"https://github.com/{account}/{repo_name}",
ref=ref,
branch=branch,
_is_remote=True,
)
[docs]
@classmethod
def from_env(cls, ref: str | None = None, branch: str | None = None) -> GitRepoState:
"""
Instantiate this class from the root of a git repository.
:raises ~gantry.exceptions.GitError: If this method isn't called from the
root of a valid git repository.
:raises ~gantry.exceptions.UnpushedChangesError: If there are unpushed commits.
"""
from .beaker_utils import is_running_in_gantry_batch_job
repo = _resolve_repo()
git_ref = ref or str(repo.commit())
remote = repo.remote()
account, repo_name = _parse_git_remote_url(remote.url)
# Check if the ref exists on the remote (if not, it's likely that there are unpushed commits).
try:
res = subprocess.run(
[
"gh",
"api",
f"repos/{account}/{repo_name}/commits/{git_ref}",
"--jq={sha: .sha, status: .status}",
],
capture_output=True,
text=True,
)
except FileNotFoundError:
msg = (
"Attempted to use the GitHub CLI to validate that the commit exists on the remote, "
"however it appears that it's not installed. "
"Please ensure the GitHub CLI is installed to avoid this warning message."
)
if utils.is_cli_mode():
utils.print_stderr(f"[yellow]{msg}[/]")
elif not is_running_in_gantry_batch_job():
warnings.warn(msg, UserWarning)
else:
try:
output = json.loads(res.stdout)
except json.JSONDecodeError:
output = {}
if not (output.get("sha") or "").startswith(git_ref): # in case the ref is abbreviated
if str(output.get("status")) in {"404", "422"}:
raise UnpushedChangesError(
f"Current git ref '{git_ref}' does not appear to exist on the remote!\n"
"Please push your changes and try again."
)
else:
msg = (
f"Unexpected response from the GitHub API while validating git ref '{git_ref}' on remote:\n"
f"{res.stdout}\n{res.stderr}"
)
warnings.warn(msg, RuntimeWarning)
# Resolve branch.
branch_name: str | None = branch
if branch_name is None:
active_branch: Head | None = None
try:
active_branch = repo.active_branch
except TypeError:
pass
else:
remote_branch = active_branch.tracking_branch()
if remote_branch is not None:
assert remote_branch.name.startswith(remote_branch.remote_name + "/")
remote = repo.remote(remote_branch.remote_name)
branch_name = remote_branch.name.replace(remote_branch.remote_name + "/", "", 1)
return cls(
repo=f"{account}/{repo_name}",
repo_url=f"https://github.com/{account}/{repo_name}",
ref=git_ref,
branch=branch_name,
)