From 6aae4ef0ac85fdda712f2541c0cfdbb6f600a60b Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Thu, 19 Dec 2024 13:17:40 +1000 Subject: [PATCH 1/2] fix: update already present repositories Signed-off-by: Ben Selwyn-Smith --- src/macaron/slsa_analyzer/git_url.py | 62 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index a516186ac..d5d466154 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -166,36 +166,13 @@ def check_out_repo_target( bool True if succeed else False. """ - if not offline_mode: - # Fetch from remote origin by running ``git fetch origin --force --tags --prune --prune-tags`` inside the target - # repository. - # The flags `--force --tags --prune --prune-tags` are used to make sure we analyze the most up-to-date version - # of the repo. - # - Any modified tags in the remote repository is updated locally. - # - Prune deleted branches and tags in the remote from the local repository. - # References: - # https://git-scm.com/docs/git-fetch - # https://github.com/oracle/macaron/issues/547 + if not offline_mode and not branch_name and not digest: try: - git_obj.repo.git.fetch( - "origin", - "--force", - "--tags", - "--prune", - "--prune-tags", - ) + git_obj.repo.git.checkout("--force", "origin/HEAD") except GitCommandError: - logger.error("Unable to fetch from the origin remote of the repository.") + logger.debug("Cannot checkout the default branch at origin/HEAD") return False - # By default check out the commit at origin/HEAD only when offline_mode is False. - if not branch_name and not digest: - try: - git_obj.repo.git.checkout("--force", "origin/HEAD") - except GitCommandError: - logger.debug("Cannot checkout the default branch at origin/HEAD") - return False - # The following checkout operations will be done whether offline_mode is False or not. if branch_name and not digest: try: @@ -337,11 +314,34 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: os.rmdir(clone_dir) logger.debug("The clone dir %s is empty. It has been deleted for cloning the repo.", clone_dir) except OSError: - logger.debug( - "The clone dir %s is not empty. Cloning will not be proceeded.", - clone_dir, - ) - return None + # Update the existing repository by running ``git fetch`` inside the existing directory. + # The flags `--force --tags --prune --prune-tags` are used to make sure we analyze the most up-to-date + # version of the repo. + # - Any modified tags in the remote repository are updated locally. + # - Deleted branches and tags in the remote repository are pruned from the local copy. + # References: + # https://git-scm.com/docs/git-fetch + # https://github.com/oracle/macaron/issues/547 + try: + git_env_patch = { + # Setting the GIT_TERMINAL_PROMPT environment variable to ``0`` stops + # ``git clone`` from prompting for login credentials. + "GIT_TERMINAL_PROMPT": "0", + } + subprocess.run( # nosec B603 + args=["git", "fetch", "origin", "--force", "--tags", "--prune", "--prune-tags"], + capture_output=True, + cwd=clone_dir, + # If `check=True` and return status code is not zero, subprocess.CalledProcessError is + # raised, which we don't want. We want to check the return status code of the subprocess + # later on. + check=False, + env=get_patched_env(git_env_patch), + ) + return Repo(path=clone_dir) + except (subprocess.CalledProcessError, OSError): + logger.debug("The clone dir %s is not empty. An attempt to update it failed.") + return None # Ensure that the parent directory where the repo is cloned into exists. parent_dir = Path(clone_dir).parent From e672b818c482706015fed8378bdd0498a8a2977c Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 20 Dec 2024 12:26:19 +1000 Subject: [PATCH 2/2] chore: add integration test Signed-off-by: Ben Selwyn-Smith --- src/macaron/slsa_analyzer/git_url.py | 10 ++---- .../update_local_repositories/modify_clone.sh | 6 ++++ .../cases/update_local_repositories/policy.dl | 11 +++++++ .../cases/update_local_repositories/test.yaml | 31 +++++++++++++++++++ 4 files changed, 51 insertions(+), 7 deletions(-) create mode 100755 tests/integration/cases/update_local_repositories/modify_clone.sh create mode 100644 tests/integration/cases/update_local_repositories/policy.dl create mode 100644 tests/integration/cases/update_local_repositories/test.yaml diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index d5d466154..603188c5f 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -124,10 +124,6 @@ def check_out_repo_target( This function assumes that a remote "origin" exist and checkout from that remote ONLY. - If ``offline_mode`` is False, this function will fetch new changes from origin remote. The fetching operation - will prune and update all references (e.g. tags, branches) to make sure that the local repository is up-to-date - with the repository specified by origin remote. - If ``offline_mode`` is True and neither ``branch_name`` nor commit are provided, this function will not do anything and the HEAD commit will be analyzed. If there are uncommitted local changes, the HEAD commit will appear in the report but the repo with local changes will be analyzed. We leave it up to the user to decide @@ -277,9 +273,9 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: """Clone the remote repository and return the `git.Repo` object for that repository. If there is an existing non-empty ``clone_dir``, Macaron assumes the repository has - been cloned already and cancels the clone. - This could happen when multiple runs of Macaron use the same ``, leading - to Macaron potentially trying to clone a repository multiple times. + been cloned already and will attempt to fetch the latest changes. The fetching operation + will prune and update all references (e.g. tags, branches) to make sure that the local + repository is up-to-date with the repository specified by origin remote. We use treeless partial clone to reduce clone time, by retrieving trees and blobs lazily. For more details, see the following: diff --git a/tests/integration/cases/update_local_repositories/modify_clone.sh b/tests/integration/cases/update_local_repositories/modify_clone.sh new file mode 100755 index 000000000..a2d32ebb5 --- /dev/null +++ b/tests/integration/cases/update_local_repositories/modify_clone.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +cd output/git_repos/github_com/avaje/avaje-prisms +git tag --delete avaje-prisms-1.1 diff --git a/tests/integration/cases/update_local_repositories/policy.dl b/tests/integration/cases/update_local_repositories/policy.dl new file mode 100644 index 000000000..b51b37ca8 --- /dev/null +++ b/tests/integration/cases/update_local_repositories/policy.dl @@ -0,0 +1,11 @@ +/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("test_policy", component_id, "") :- + check_passed(component_id, "mcn_version_control_system_1"), + is_repo_url(component_id, "https://github.com/avaje/avaje-prisms"). + +apply_policy_to("test_policy", component_id) :- + is_component(component_id, "pkg:maven/io.avaje/avaje-prisms@1.1"). diff --git a/tests/integration/cases/update_local_repositories/test.yaml b/tests/integration/cases/update_local_repositories/test.yaml new file mode 100644 index 000000000..242d23c76 --- /dev/null +++ b/tests/integration/cases/update_local_repositories/test.yaml @@ -0,0 +1,31 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Ensuring previously cloned repositories are updated when newer changes are available at their remote origins. + +tags: +- macaron-python-package +- macaron-docker-image + +steps: +- name: Analyze a repository + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.avaje/avaje-prisms@1.1 +- name: Delete the chosen tag from the repository + kind: shell + options: + cmd: ./modify_clone.sh +- name: Analyze the repository again + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.avaje/avaje-prisms@1.1 +- name: Run macaron verify-policy to verify version control check which will only pass if the tag is found + kind: verify + options: + policy: policy.dl