Skip to content

Commit

Permalink
refactor!: allow the branch name in the schema of a repository to be …
Browse files Browse the repository at this point in the history
…null (#532)

This PR changes the Repository schema in the data model to allow a branch name to be None. This is in preparation for mapping artifacts to commits because sometimes we might not be able to find a branch. However, we should still be able to checkout using the resolved commit hash.

Note that the local `macaron.db` needs to be removed to apply this change in the data model, hence the breaking change scope in the PR title.

Signed-off-by: behnazh-w <[email protected]>
  • Loading branch information
behnazh-w authored Oct 31, 2023
1 parent ba3b617 commit 1b7740b
Show file tree
Hide file tree
Showing 16 changed files with 305 additions and 261 deletions.
467 changes: 256 additions & 211 deletions docs/source/assets/er-diagram.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ class Repository(ORMBase):
remote_path: Mapped[str] = mapped_column(String, nullable=False)

#: The branch name.
branch_name: Mapped[str] = mapped_column(String, nullable=False)
branch_name: Mapped[str | None] = mapped_column(String, nullable=True)

#: The release tag.
release_tag: Mapped[str] = mapped_column(String, nullable=True)
Expand Down
11 changes: 5 additions & 6 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,15 +308,15 @@ def run_single(
context=analyze_ctx,
)

def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None:
def add_repository(self, branch_name: str | None, git_obj: Git) -> Repository | None:
"""Create a repository instance for a target repository.
The repository instances are transient objects for SQLAlchemy, which may be
added to the database ultimately.
Parameters
----------
branch_name : str
branch_name : str | None
The name of the branch that we are analyzing.
We need this because when the target repository is in a detached state,
the current branch name cannot be determined.
Expand All @@ -342,7 +342,7 @@ def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None:

logger.info("The complete name of this repository is %s", complete_name)

res_branch = ""
res_branch = None

if branch_name:
res_branch = branch_name
Expand All @@ -353,9 +353,8 @@ def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None:
# HEAD is a detached symbolic reference. This happens when we checkout a commit.
# However, it shouldn't happen as we don't allow specifying a commit digest without
# a branch in the config.
logger.critical("The HEAD of the repo does not point to any branch.")
logger.error(err)
res_branch = ""
logger.debug("The HEAD of the repo does not point to any branch: %s.", err)
res_branch = None

# Get the head commit.
# This is the commit that Macaron will run the analysis on.
Expand Down
8 changes: 4 additions & 4 deletions src/macaron/slsa_analyzer/ci_service/base_ci_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def has_kws_in_config(self, kws: list, repo_path: str) -> tuple[str, str]:
return "", ""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Get the latest run of a workflow in the repository.
Expand All @@ -180,7 +180,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand Down Expand Up @@ -297,7 +297,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
return []

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Get the latest run of a workflow in the repository.
Expand All @@ -308,7 +308,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand Down
6 changes: 3 additions & 3 deletions src/macaron/slsa_analyzer/ci_service/circleci.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module analyze Circle CI."""
Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
return []

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Get the latest run of a workflow in the repository.
Expand All @@ -90,7 +90,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand Down
36 changes: 17 additions & 19 deletions src/macaron/slsa_analyzer/ci_service/github_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
)

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Check if the latest run of ``workflow`` on commit ``commit_sha`` is passing.
Expand All @@ -306,7 +306,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand All @@ -320,7 +320,7 @@ def has_latest_run_passed(
str
The URL for the passing workflow run, or empty if no passing GitHub Action build workflow is found.
"""
logger.info("Getting the latest workflow run of %s on commit %s", workflow, commit_sha)
logger.debug("Getting the latest workflow run of %s on commit %s", workflow, commit_sha)

# Checking if the commit was created more than max_workflow_persist days ago.
# We only avoid looking for workflow runs if only it's confirmed that the commit
Expand Down Expand Up @@ -365,20 +365,14 @@ def has_latest_run_passed(
workflow_id,
commit_sha,
repo_full_name,
branch_name,
commit_date,
branch_name=branch_name,
created_after=commit_date,
)

if not latest_run_data:
logger.info("Cannot find target workflow run with filtering.")
logger.info("Perform the workflow runs search without any filtering instead.")
latest_run_data = self.search_for_workflow_run(
workflow_id,
commit_sha,
repo_full_name,
"",
"",
)
latest_run_data = self.search_for_workflow_run(workflow_id, commit_sha, repo_full_name)

if not latest_run_data:
logger.info("Cannot find target workflow run after trying both search methods.")
Expand Down Expand Up @@ -528,8 +522,8 @@ def search_for_workflow_run(
workflow_id: str,
commit_sha: str,
full_name: str,
branch_name: str = "",
created_after: str = "",
branch_name: str | None = None,
created_after: str | None = None,
) -> dict:
"""Search for the target workflow run using GitHub API.
Expand All @@ -549,17 +543,17 @@ def search_for_workflow_run(
The digest of the commit the workflow run on.
full_name : str
The full name of the repository (e.g. ``owner/repo``).
branch_name : str
branch_name : str | None
The branch name to filter out workflow runs.
created_after : str
created_after : str | None
Only look for workflow runs after this date (e.g. 2022-03-11T16:44:40Z).
Returns
-------
dict
The response data of the latest workflow run or an empty dict if error.
"""
logger.info(
logger.debug(
"Search for workflow runs of %s with query params (branch=%s,created=%s)",
workflow_id,
branch_name,
Expand All @@ -568,7 +562,9 @@ def search_for_workflow_run(

# Get the first page of runs for this workflow.
query_page = 1
runs_data = self.api_client.get_workflow_runs(full_name, branch_name, created_after, query_page)
runs_data = self.api_client.get_workflow_runs(
full_name, branch_name=branch_name, created_after=created_after, page=query_page
)

while runs_data and query_page <= self.query_page_threshold:
logger.info(
Expand All @@ -589,7 +585,9 @@ def search_for_workflow_run(

# Query more items on the next result page of GitHub API.
query_page += 1
runs_data = self.api_client.get_workflow_runs(full_name, branch_name, created_after, query_page)
runs_data = self.api_client.get_workflow_runs(
full_name, branch_name=branch_name, created_after=created_after, page=query_page
)
except KeyError:
logger.error("Error while reading run data. Skipping ...")
continue
Expand Down
6 changes: 3 additions & 3 deletions src/macaron/slsa_analyzer/ci_service/gitlab_ci.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module analyzes GitLab CI."""
Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
return []

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Get the latest run of a workflow in the repository.
Expand All @@ -90,7 +90,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand Down
6 changes: 3 additions & 3 deletions src/macaron/slsa_analyzer/ci_service/jenkins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module analyzes Jenkins CI."""
Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
return []

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Get the latest run of a workflow in the repository.
Expand All @@ -90,7 +90,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand Down
6 changes: 3 additions & 3 deletions src/macaron/slsa_analyzer/ci_service/travis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module analyzes Travis CI."""
Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
return []

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
"""Get the latest run of a workflow in the repository.
Expand All @@ -90,7 +90,7 @@ def has_latest_run_passed(
----------
repo_full_name : str
The target repo's full name.
branch_name : str
branch_name : str | None
The target branch.
commit_sha : str
The commit sha of the target repo.
Expand Down
6 changes: 4 additions & 2 deletions src/macaron/slsa_analyzer/git_service/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def get_repo_workflow_data(self, full_name: str, workflow_name: str) -> dict:

return response_data

def get_workflow_runs(self, full_name: str, branch_name: str, created_after: str, page: int) -> dict:
def get_workflow_runs(
self, full_name: str, branch_name: str | None = None, created_after: str | None = None, page: int = 1
) -> dict:
"""Query the GitHub REST API for the data of all workflow run of a repository.
The url would be in the following form:
Expand All @@ -218,7 +220,7 @@ def get_workflow_runs(self, full_name: str, branch_name: str, created_after: str
----------
full_name : str
The full name of the target repo in the form ``owner/repo``.
branch_name : str
branch_name : str | None
The name of the branch to look for workflow runs (e.g ``master``).
created_after : str
Only look for workflow runs after this date (e.g. ``2022-03-11T16:44:40Z``).
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class MockGitHubActions(GitHubActions):
"""Mock the GitHubActions class."""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
return "run_feedback"

Expand Down
2 changes: 1 addition & 1 deletion tests/slsa_analyzer/checks/test_build_service_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MockGitHubActions(GitHubActions):
"""Mock the GitHubActions class."""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
return "run_feedback"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MockGitHubActions(GitHubActions):
"""Mock the GitHubActions class."""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
return "run_feedback"

Expand Down
2 changes: 1 addition & 1 deletion tests/slsa_analyzer/checks/test_provenance_l3_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class MockGitHubActions(GitHubActions):
"""Mock the GitHubActions class."""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
return "run_feedback"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MockGitHubActions(GitHubActions):
"""Mock the GitHubActions class."""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
return "run_feedback"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class MockGitHubActions(GitHubActions):
"""Mock the GitHubActions class."""

def has_latest_run_passed(
self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str
self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str
) -> str:
return "run_feedback"

Expand Down

0 comments on commit 1b7740b

Please sign in to comment.