Skip to content

Commit

Permalink
chore: use a new design for check_out_repo_target
Browse files Browse the repository at this point in the history
* Remove git pull and use the new design where the branch is not used to checked out anymore.
* Update some integrations tests where the tag is used as the branch, which is not correct.
* Implement a function to find all remote branches that contain a commit.

Signed-off-by: Trong Nhan Mai <[email protected]>
  • Loading branch information
tromai committed Nov 10, 2023
1 parent fe81a0c commit ce3dda7
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 83 deletions.
203 changes: 146 additions & 57 deletions src/macaron/slsa_analyzer/git_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,126 @@
logger: logging.Logger = logging.getLogger(__name__)


def check_out_repo_target(git_obj: Git, branch_name: str = "", digest: str = "", offline_mode: bool = False) -> bool:
def parse_git_branch_output(content: str) -> list[str]:
"""Return a list of branch name from the output of ``git branch``.
This function only work for the output format of the first form of ``git branch`` where the branches are listed.
See https://git-scm.com/docs/git-branch for definition of the first form of ``git branch``.
Parameters
----------
content : str
The raw output as string from the ``git branch`` command.
Returns
-------
list[str]
The list of strings where each string is a branch element from the raw output.
Examples
--------
>>> print(content)
* (HEAD detached at 7fc81f8)
master
remotes/origin/HEAD -> origin/master
remotes/origin/master
remotes/origin/v2.dev
remotes/origin/v3.dev
>>> _parse_branch_output(content)
[
'(HEAD detached at 7fc81f8)',
'master',
'remotes/origin/HEAD -> origin/master',
'remotes/origin/master',
'remotes/origin/v2.dev',
'remotes/origin/v3.dev',
]
"""
raw_branch_names = content.split("\n")
result = []
for name in raw_branch_names:
# Ignore elements that doesn't contain only whitespaces. This is because the raw content of git branch
# can have extra new line at the end, which can be picked up as an empty element in `raw_branch_names`.
if len(name.strip()) == 0:
continue

# The ``*`` symbol will appear next to the branch name where HEAD is currently on.
# Branches in git cannot have ``*`` in its name so we can safely replace without tampering with its actual name.
# https://git-scm.com/docs/git-check-ref-format
cleaned_up_branch_name = name.replace("*", "").strip()

result.append(cleaned_up_branch_name)

return result


def get_branches_for_commit(git_obj: Git, commit: str, remote: str = "origin") -> list[str]:
"""Get the branches from a remote that contains a specific commit.
The returned branch names will be in the form of <remote>/<branch_name>.
Parameters
----------
git_obj : Git
The pydriller.Git wrapper object of the target repository.
commit : str
The hash of the commit we want to get all the branches.
remote : str, optional
The name of the remote to check the branches, by default "origin".
Returns
-------
list[str]
The list of branches that contains the commit.
"""
try:
raw_output: str = git_obj.repo.git.branch(
"--remotes",
"--list",
f"{remote}/*",
"--contains",
commit,
)
except GitCommandError:
logger.debug("Error while using git branch to look for branches that contains %s.", commit)
return []

return parse_git_branch_output(raw_output)


def check_out_repo_target(
git_obj: Git,
branch_name: str = "",
digest: str = "",
offline_mode: bool = False,
) -> bool:
"""Checkout the branch and commit specified by the user.
If no branch name is provided, this method will checkout the default branch
of the repository and analyze the latest commit from remote. Note that checking out the branch
is always performed before checking out the specific ``digest`` (if provided).
This fucntion assumes that a remote "origin" exist and checkout from that remote ONLY.
If ``digest`` is not provided, this method always pulls (fast-forward only) and checks out the latest commit.
If ``offline_mode`` is False, this function will fetch new changes from origin remote. The fetching operation
will prune and update all references (e.g. tags, branches) to make sure that the local repository is up-to-date
with the repository specified by origin remote.
If ``digest`` is provided, this method will checkout that specific commit. If ``digest``
cannot be found in the current branch, this method will pull (fast-forward only) from remote.
If ``branch_name`` and a commit are not provided, this function will checkout the latest commit of the
default branch (i.e. origin/HEAD).
This method supports repositories which are cloned from existing remote repositories.
Other scenarios are not covered (e.g. a newly initiated repository).
If ``branch_name`` is provided and a commit is not provided, this function will checkout that branch from origin
remote (i.e. origin/<branch_name).
If ``offline_mode`` is set, this method will not pull/fetch from remote while checking out the branch or commit.
If ``branch_name`` is not provided and a commit is provided, this function will checkout the commit directly.
If both ``branch_name`` and a commit are provided, this function will checkout the commit directly only if that
commit exists in the branch origin/<branch_name>. If not, this fucntion will return False.
For all scenarios:
- If the checkout failed (e.g. a branch or a commit doesn't exist), this function will return
False.
- This function will perform a force checkout, which mean it will reset the index and working tree of the target
repository.
This function supports repositories which are cloned from existing remote repositories.
Other scenarios are not covered (e.g. a newly initiated repository).
Parameters
----------
Expand All @@ -58,17 +162,6 @@ def check_out_repo_target(git_obj: Git, branch_name: str = "", digest: str = "",
bool
True if succeed else False.
"""
# Resolve the branch name to check out.
res_branch = ""
if branch_name:
res_branch = branch_name
else:
res_branch = get_default_branch(git_obj)
if not res_branch:
logger.error("Cannot determine the default branch for this repository.")
logger.info("Consider providing the specific branch to be analyzed or fully cloning the repo instead.")
return False

if not offline_mode:
# Fetch from remote origin by running ``git fetch origin --force --tags --prune --prune-tags`` inside the target
# repository.
Expand All @@ -91,49 +184,45 @@ def check_out_repo_target(git_obj: Git, branch_name: str = "", digest: str = "",
logger.error("Unable to fetch from the origin remote of the repository.")
return False

try:
# Switch to the target branch by running ``git checkout <branch_name>`` in the target repository.
# We need to use force checkout to prevent issues similar to https://github.com/oracle/macaron/issues/530.
git_obj.repo.git.checkout("--force", res_branch)
except GitCommandError:
logger.error("Cannot checkout branch %s.", res_branch)
return False
if not branch_name and not digest:
try:
git_obj.repo.git.checkout("--force", "origin/HEAD")
except GitCommandError:
logger.debug("Cannot checkout the default branch at origin/HEAD")
return False

logger.info("Successfully checkout branch %s.", res_branch)
if branch_name and not digest:
try:
git_obj.repo.git.checkout("--force", f"origin/{branch_name}")
except GitCommandError:
logger.debug("Cannot checkout branch %s from origin remote.", branch_name)
return False

if not offline_mode:
# We only pull the latest changes if one of these scenarios happens:
# - no digest is provided: we need to pull and analyze the latest commit.
# - a commit digest is provided but it does not exist locally: we need to
# pull the latest changes to check if that commit is available.
# We want to check if the commit already exist locally first because we want to avoid pulling unecessary
# if it does.
# We do this by checking if the commit we want to analyze is an ancestor of the commit being referenced by HEAD
# (which point to the tip of the branch).
# If the commit we want to analyze is same as HEAD, that commit is still considered as the ancestor of HEAD.
# The ``is_ancestor`` method runs ``git merge-base`` behind the scence.
# For more information on computing the ancestor status of two commits: https://git-scm.com/docs/git-merge-base.
if not digest or not git_obj.repo.is_ancestor(digest, "HEAD"):
logger.info("Pulling the latest changes of branch %s fast-forward only.", res_branch)
if not branch_name and digest:
try:
git_obj.repo.git.checkout("--force", f"{digest}")
except GitCommandError:
logger.debug("Cannot checkout commit %s.", digest)
return False

if branch_name and digest:
branches = get_branches_for_commit(
git_obj=git_obj,
commit=digest,
remote="origin",
)

if f"origin/{branch_name}" in branches:
try:
# Pull the latest changes on the current branch fast-forward only.
git_obj.repo.git.pull("--ff-only")
git_obj.repo.git.checkout("--force", f"{digest}")
except GitCommandError:
logger.error("Cannot pull the latest changes.")
logger.debug("Cannot checkout commit %s.", digest)
return False

if digest:
# Checkout the specific commit that the user want by running ``git checkout <commit>`` in the target repository.
# We need to use force checkout to prevent issues similar to https://github.com/oracle/macaron/issues/530.
try:
git_obj.repo.git.checkout("--force", digest)
except GitCommandError:
logger.error(
"Commit %s cannot be checked out.",
digest,
)
else:
logger.error("Commit %s is not in branch %s.", digest, branch_name)
return False

# Further validation to make sure the git checkout operations happen as expected.
final_head_commit: Commit = git_obj.repo.head.commit
if not final_head_commit:
logger.critical("Cannot get the head commit after checking out.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ dependencies:
- id: slf4j
# For version 1.7.36
# https://github.com/qos-ch/slf4j/commit/e9ee55cca93c2bf26f14482a9bdf961c750d2a56
branch: v_1.7.36
branch: 1.7-maintenance
digest: e9ee55cca93c2bf26f14482a9bdf961c750d2a56
path: https://github.com/qos-ch/slf4j.git

- id: caffeine
# For version 2.9.3
# https://github.com/ben-manes/caffeine/commit/05a040c2478341bab8a58a02b3dc1fe14d626d72
branch: v2.9.3
branch: v2.dev
digest: 05a040c2478341bab8a58a02b3dc1fe14d626d72
path: https://github.com/ben-manes/caffeine.git
4 changes: 2 additions & 2 deletions tests/e2e/configurations/micronaut_core_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ dependencies:
- id: slf4j
# For version 1.7.36
# https://github.com/qos-ch/slf4j/commit/e9ee55cca93c2bf26f14482a9bdf961c750d2a56
branch: v_1.7.36
branch: 1.7-maintenance
digest: e9ee55cca93c2bf26f14482a9bdf961c750d2a56
path: https://github.com/qos-ch/slf4j.git

- id: caffeine
# For version 2.9.3
# https://github.com/ben-manes/caffeine/commit/05a040c2478341bab8a58a02b3dc1fe14d626d72
branch: v2.9.3
branch: v2.dev
digest: 05a040c2478341bab8a58a02b3dc1fe14d626d72
path: https://github.com/ben-manes/caffeine.git
23 changes: 12 additions & 11 deletions tests/e2e/expected_results/micronaut-core/caffeine.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
{
"metadata": {
"timestamps": "2023-09-12 22:55:15"
"timestamps": "2023-11-10 14:01:43",
"has_passing_check": true
},
"target": {
"info": {
"full_name": "pkg:github.com/ben-manes/caffeine@05a040c2478341bab8a58a02b3dc1fe14d626d72",
"local_cloned_path": "git_repos/github_com/ben-manes/caffeine",
"remote_path": "https://github.com/ben-manes/caffeine",
"branch": "v2.9.3",
"branch": "v2.dev",
"commit_hash": "05a040c2478341bab8a58a02b3dc1fe14d626d72",
"commit_date": "2021-12-02T01:04:44-08:00"
},
Expand All @@ -26,7 +27,7 @@
"buildType": "Custom github_actions",
"invocation": {
"configSource": {
"uri": "https://github.com/ben-manes/caffeine@refs/heads/v2.9.3",
"uri": "https://github.com/ben-manes/caffeine@refs/heads/v2.dev",
"digest": {
"sha1": "05a040c2478341bab8a58a02b3dc1fe14d626d72"
},
Expand Down Expand Up @@ -189,7 +190,7 @@
"Provenance content - Identifies builder - SLSA Level 1"
],
"justification": [
"Could not find any SLSA provenances."
"Could not find any SLSA or Witness provenances."
],
"result_type": "FAILED"
},
Expand Down Expand Up @@ -254,39 +255,39 @@
"unique_dep_repos": 0,
"checks_summary": [
{
"check_id": "mcn_provenance_expectation_1",
"check_id": "mcn_version_control_system_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_witness_level_one_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_available_1",
"check_id": "mcn_build_as_code_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_infer_artifact_pipeline_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_build_as_code_1",
"check_id": "mcn_trusted_builder_level_three_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_version_control_system_1",
"check_id": "mcn_provenance_level_three_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_trusted_builder_level_three_1",
"check_id": "mcn_build_script_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_build_script_1",
"check_id": "mcn_provenance_expectation_1",
"num_deps_pass": 0
},
{
"check_id": "mcn_provenance_level_three_1",
"check_id": "mcn_provenance_available_1",
"num_deps_pass": 0
},
{
Expand Down
Loading

0 comments on commit ce3dda7

Please sign in to comment.