diff --git a/docs/source/assets/er-diagram.svg b/docs/source/assets/er-diagram.svg index 96ee2b72b..16925af54 100644 --- a/docs/source/assets/er-diagram.svg +++ b/docs/source/assets/er-diagram.svg @@ -4,11 +4,11 @@ - - + + %3 - + _analysis @@ -95,14 +95,14 @@ NOT NULL - + _component--_check_facts 0..N 1 - + _check_result _check_result @@ -124,14 +124,14 @@ NOT NULL - + _component--_check_result 0..N 1 - + _dependency _dependency @@ -145,21 +145,21 @@ NOT NULL - + _component--_dependency 1 1 - + _component--_dependency 1 1 - + _provenance _provenance @@ -187,14 +187,14 @@ NOT NULL - + _component--_provenance 0..N 1 - + _repository _repository @@ -205,7 +205,6 @@ branch_name [VARCHAR] - NOT NULL commit_date [VARCHAR] @@ -250,14 +249,14 @@ NOT NULL - + _component--_repository 0..N 1 - + _slsa_level _slsa_level @@ -275,14 +274,14 @@ NOT NULL - + _component--_slsa_level 1 1 - + _slsa_requirement _slsa_requirement @@ -306,7 +305,7 @@ [VARCHAR] - + _component--_slsa_requirement 0..N @@ -315,262 +314,308 @@ _build_as_code_check - -_build_as_code_check - -id - [INTEGER] - NOT NULL - -build_status_url - [VARCHAR] - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL - -deploy_command - [VARCHAR] + +_build_as_code_check + +id + [INTEGER] + NOT NULL + +build_status_url + [VARCHAR] + +build_tool_name + [VARCHAR] + NOT NULL + +build_trigger + [VARCHAR] + +ci_service_name + [VARCHAR] + NOT NULL + +deploy_command + [VARCHAR] _check_facts--_build_as_code_check - -1 -1 + +1 +1 _build_script_check - -_build_script_check - -id - [INTEGER] - NOT NULL - -build_tool_name - [VARCHAR] - NOT NULL + +_build_script_check + +id + [INTEGER] + NOT NULL + +build_tool_name + [VARCHAR] + NOT NULL _check_facts--_build_script_check - -1 -1 + +1 +1 _build_service_check - -_build_service_check - -id - [INTEGER] - NOT NULL - -build_command - [VARCHAR] - -build_status_url - [VARCHAR] - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL + +_build_service_check + +id + [INTEGER] + NOT NULL + +build_command + [VARCHAR] + +build_status_url + [VARCHAR] + +build_tool_name + [VARCHAR] + NOT NULL + +build_trigger + [VARCHAR] + +ci_service_name + [VARCHAR] + NOT NULL _check_facts--_build_service_check - -1 -1 + +1 +1 _expectation - -_expectation - -id - [INTEGER] - NOT NULL - -description - [VARCHAR] - NOT NULL - -expectation_type - [VARCHAR] - NOT NULL - -path - [VARCHAR] - NOT NULL - -sha - [VARCHAR] - -target - [VARCHAR] - NOT NULL - -text - [VARCHAR] + +_expectation + +id + [INTEGER] + NOT NULL + +description + [VARCHAR] + NOT NULL + +expectation_type + [VARCHAR] + NOT NULL + +path + [VARCHAR] + NOT NULL + +sha + [VARCHAR] + +target + [VARCHAR] + NOT NULL + +text + [VARCHAR] _check_facts--_expectation - -1 -1 + +1 +1 - + +_infer_artifact_pipeline_check + +_infer_artifact_pipeline_check + +id + [INTEGER] + NOT NULL + +deploy_job + [VARCHAR] + NOT NULL + +deploy_step + [VARCHAR] + NOT NULL + +run_url + [VARCHAR] + NOT NULL + + + +_check_facts--_infer_artifact_pipeline_check + +1 +1 + + + _provenance_available_check - -_provenance_available_check - -id - [INTEGER] - NOT NULL - -asset_name - [VARCHAR] - NOT NULL - -asset_url - [VARCHAR] + +_provenance_available_check + +id + [INTEGER] + NOT NULL + +asset_name + [VARCHAR] + NOT NULL + +asset_url + [VARCHAR] - + _check_facts--_provenance_available_check - -1 -1 + +1 +1 - + _provenance_l3_check - -_provenance_l3_check - -id - [INTEGER] - NOT NULL + +_provenance_l3_check + +id + [INTEGER] + NOT NULL - + _check_facts--_provenance_l3_check - -1 -1 + +1 +1 + + + +_provenance_witness_l1_check + +_provenance_witness_l1_check + +id + [INTEGER] + NOT NULL + + + +_check_facts--_provenance_witness_l1_check + +1 +1 - + _trusted_builder_check - -_trusted_builder_check - -id - [INTEGER] - NOT NULL - -build_tool_name - [VARCHAR] - NOT NULL - -build_trigger - [VARCHAR] - -ci_service_name - [VARCHAR] - NOT NULL + +_trusted_builder_check + +id + [INTEGER] + NOT NULL + +build_tool_name + [VARCHAR] + NOT NULL + +build_trigger + [VARCHAR] + +ci_service_name + [VARCHAR] + NOT NULL - + _check_facts--_trusted_builder_check - -1 -1 + +1 +1 - + _check_result--_check_facts 0..N 1 - + _release_artifact - -_release_artifact - -id - [INTEGER] - NOT NULL - -name - [VARCHAR] - NOT NULL - -provenance_id - [INTEGER] - -slsa_verified - [BOOLEAN] + +_release_artifact + +id + [INTEGER] + NOT NULL + +name + [VARCHAR] + NOT NULL + +provenance_id + [INTEGER] + +slsa_verified + [BOOLEAN] - + _provenance--_release_artifact - -0..N -{0,1} + +0..N +{0,1} - + _hash_digest - -_hash_digest - -id - [INTEGER] - NOT NULL - -artifact_id - [INTEGER] - NOT NULL - -digest - [VARCHAR] - NOT NULL - -digest_algorithm - [VARCHAR] - NOT NULL + +_hash_digest + +id + [INTEGER] + NOT NULL + +artifact_id + [INTEGER] + NOT NULL + +digest + [VARCHAR] + NOT NULL + +digest_algorithm + [VARCHAR] + NOT NULL - + _release_artifact--_hash_digest - -0..N -1 + +0..N +1 diff --git a/src/macaron/database/table_definitions.py b/src/macaron/database/table_definitions.py index 901920a56..d3c0a2284 100644 --- a/src/macaron/database/table_definitions.py +++ b/src/macaron/database/table_definitions.py @@ -273,7 +273,7 @@ class Repository(ORMBase): remote_path: Mapped[str] = mapped_column(String, nullable=False) #: The branch name. - branch_name: Mapped[str] = mapped_column(String, nullable=False) + branch_name: Mapped[str | None] = mapped_column(String, nullable=True) #: The release tag. release_tag: Mapped[str] = mapped_column(String, nullable=True) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 2005f2af1..4550ec924 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -308,7 +308,7 @@ def run_single( context=analyze_ctx, ) - def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None: + def add_repository(self, branch_name: str | None, git_obj: Git) -> Repository | None: """Create a repository instance for a target repository. The repository instances are transient objects for SQLAlchemy, which may be @@ -316,7 +316,7 @@ def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None: Parameters ---------- - branch_name : str + branch_name : str | None The name of the branch that we are analyzing. We need this because when the target repository is in a detached state, the current branch name cannot be determined. @@ -342,7 +342,7 @@ def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None: logger.info("The complete name of this repository is %s", complete_name) - res_branch = "" + res_branch = None if branch_name: res_branch = branch_name @@ -353,9 +353,8 @@ def add_repository(self, branch_name: str, git_obj: Git) -> Repository | None: # HEAD is a detached symbolic reference. This happens when we checkout a commit. # However, it shouldn't happen as we don't allow specifying a commit digest without # a branch in the config. - logger.critical("The HEAD of the repo does not point to any branch.") - logger.error(err) - res_branch = "" + logger.debug("The HEAD of the repo does not point to any branch: %s.", err) + res_branch = None # Get the head commit. # This is the commit that Macaron will run the analysis on. diff --git a/src/macaron/slsa_analyzer/ci_service/base_ci_service.py b/src/macaron/slsa_analyzer/ci_service/base_ci_service.py index b87b1eb77..ec9e0ad41 100644 --- a/src/macaron/slsa_analyzer/ci_service/base_ci_service.py +++ b/src/macaron/slsa_analyzer/ci_service/base_ci_service.py @@ -169,7 +169,7 @@ def has_kws_in_config(self, kws: list, repo_path: str) -> tuple[str, str]: return "", "" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Get the latest run of a workflow in the repository. @@ -180,7 +180,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. @@ -297,7 +297,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter return [] def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Get the latest run of a workflow in the repository. @@ -308,7 +308,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. diff --git a/src/macaron/slsa_analyzer/ci_service/circleci.py b/src/macaron/slsa_analyzer/ci_service/circleci.py index 491a3df76..f0a8bf754 100644 --- a/src/macaron/slsa_analyzer/ci_service/circleci.py +++ b/src/macaron/slsa_analyzer/ci_service/circleci.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyze Circle CI.""" @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter return [] def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Get the latest run of a workflow in the repository. @@ -90,7 +90,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions.py b/src/macaron/slsa_analyzer/ci_service/github_actions.py index 4e63a4ef2..cb5d2b071 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions.py @@ -294,7 +294,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter ) def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Check if the latest run of ``workflow`` on commit ``commit_sha`` is passing. @@ -306,7 +306,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. @@ -320,7 +320,7 @@ def has_latest_run_passed( str The URL for the passing workflow run, or empty if no passing GitHub Action build workflow is found. """ - logger.info("Getting the latest workflow run of %s on commit %s", workflow, commit_sha) + logger.debug("Getting the latest workflow run of %s on commit %s", workflow, commit_sha) # Checking if the commit was created more than max_workflow_persist days ago. # We only avoid looking for workflow runs if only it's confirmed that the commit @@ -365,20 +365,14 @@ def has_latest_run_passed( workflow_id, commit_sha, repo_full_name, - branch_name, - commit_date, + branch_name=branch_name, + created_after=commit_date, ) if not latest_run_data: logger.info("Cannot find target workflow run with filtering.") logger.info("Perform the workflow runs search without any filtering instead.") - latest_run_data = self.search_for_workflow_run( - workflow_id, - commit_sha, - repo_full_name, - "", - "", - ) + latest_run_data = self.search_for_workflow_run(workflow_id, commit_sha, repo_full_name) if not latest_run_data: logger.info("Cannot find target workflow run after trying both search methods.") @@ -528,8 +522,8 @@ def search_for_workflow_run( workflow_id: str, commit_sha: str, full_name: str, - branch_name: str = "", - created_after: str = "", + branch_name: str | None = None, + created_after: str | None = None, ) -> dict: """Search for the target workflow run using GitHub API. @@ -549,9 +543,9 @@ def search_for_workflow_run( The digest of the commit the workflow run on. full_name : str The full name of the repository (e.g. ``owner/repo``). - branch_name : str + branch_name : str | None The branch name to filter out workflow runs. - created_after : str + created_after : str | None Only look for workflow runs after this date (e.g. 2022-03-11T16:44:40Z). Returns @@ -559,7 +553,7 @@ def search_for_workflow_run( dict The response data of the latest workflow run or an empty dict if error. """ - logger.info( + logger.debug( "Search for workflow runs of %s with query params (branch=%s,created=%s)", workflow_id, branch_name, @@ -568,7 +562,9 @@ def search_for_workflow_run( # Get the first page of runs for this workflow. query_page = 1 - runs_data = self.api_client.get_workflow_runs(full_name, branch_name, created_after, query_page) + runs_data = self.api_client.get_workflow_runs( + full_name, branch_name=branch_name, created_after=created_after, page=query_page + ) while runs_data and query_page <= self.query_page_threshold: logger.info( @@ -589,7 +585,9 @@ def search_for_workflow_run( # Query more items on the next result page of GitHub API. query_page += 1 - runs_data = self.api_client.get_workflow_runs(full_name, branch_name, created_after, query_page) + runs_data = self.api_client.get_workflow_runs( + full_name, branch_name=branch_name, created_after=created_after, page=query_page + ) except KeyError: logger.error("Error while reading run data. Skipping ...") continue diff --git a/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py b/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py index 90ec7e68b..d8060aa02 100644 --- a/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/gitlab_ci.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes GitLab CI.""" @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter return [] def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Get the latest run of a workflow in the repository. @@ -90,7 +90,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. diff --git a/src/macaron/slsa_analyzer/ci_service/jenkins.py b/src/macaron/slsa_analyzer/ci_service/jenkins.py index 3a2076843..0abc8d21e 100644 --- a/src/macaron/slsa_analyzer/ci_service/jenkins.py +++ b/src/macaron/slsa_analyzer/ci_service/jenkins.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes Jenkins CI.""" @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter return [] def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Get the latest run of a workflow in the repository. @@ -90,7 +90,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. diff --git a/src/macaron/slsa_analyzer/ci_service/travis.py b/src/macaron/slsa_analyzer/ci_service/travis.py index b953b9978..499842c23 100644 --- a/src/macaron/slsa_analyzer/ci_service/travis.py +++ b/src/macaron/slsa_analyzer/ci_service/travis.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes Travis CI.""" @@ -79,7 +79,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter return [] def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: """Get the latest run of a workflow in the repository. @@ -90,7 +90,7 @@ def has_latest_run_passed( ---------- repo_full_name : str The target repo's full name. - branch_name : str + branch_name : str | None The target branch. commit_sha : str The commit sha of the target repo. diff --git a/src/macaron/slsa_analyzer/git_service/api_client.py b/src/macaron/slsa_analyzer/git_service/api_client.py index f3e921486..f431e33e2 100644 --- a/src/macaron/slsa_analyzer/git_service/api_client.py +++ b/src/macaron/slsa_analyzer/git_service/api_client.py @@ -204,7 +204,9 @@ def get_repo_workflow_data(self, full_name: str, workflow_name: str) -> dict: return response_data - def get_workflow_runs(self, full_name: str, branch_name: str, created_after: str, page: int) -> dict: + def get_workflow_runs( + self, full_name: str, branch_name: str | None = None, created_after: str | None = None, page: int = 1 + ) -> dict: """Query the GitHub REST API for the data of all workflow run of a repository. The url would be in the following form: @@ -218,7 +220,7 @@ def get_workflow_runs(self, full_name: str, branch_name: str, created_after: str ---------- full_name : str The full name of the target repo in the form ``owner/repo``. - branch_name : str + branch_name : str | None The name of the branch to look for workflow runs (e.g ``master``). created_after : str Only look for workflow runs after this date (e.g. ``2022-03-11T16:44:40Z``). diff --git a/tests/conftest.py b/tests/conftest.py index 82193fc14..c82414a88 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -232,7 +232,7 @@ class MockGitHubActions(GitHubActions): """Mock the GitHubActions class.""" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: return "run_feedback" diff --git a/tests/slsa_analyzer/checks/test_build_service_check.py b/tests/slsa_analyzer/checks/test_build_service_check.py index c633f01d5..39211bcf1 100644 --- a/tests/slsa_analyzer/checks/test_build_service_check.py +++ b/tests/slsa_analyzer/checks/test_build_service_check.py @@ -26,7 +26,7 @@ class MockGitHubActions(GitHubActions): """Mock the GitHubActions class.""" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: return "run_feedback" diff --git a/tests/slsa_analyzer/checks/test_provenance_available_check.py b/tests/slsa_analyzer/checks/test_provenance_available_check.py index 59bcb83a6..a610e0368 100644 --- a/tests/slsa_analyzer/checks/test_provenance_available_check.py +++ b/tests/slsa_analyzer/checks/test_provenance_available_check.py @@ -26,7 +26,7 @@ class MockGitHubActions(GitHubActions): """Mock the GitHubActions class.""" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: return "run_feedback" diff --git a/tests/slsa_analyzer/checks/test_provenance_l3_check.py b/tests/slsa_analyzer/checks/test_provenance_l3_check.py index de8cb528a..ecae41283 100644 --- a/tests/slsa_analyzer/checks/test_provenance_l3_check.py +++ b/tests/slsa_analyzer/checks/test_provenance_l3_check.py @@ -23,7 +23,7 @@ class MockGitHubActions(GitHubActions): """Mock the GitHubActions class.""" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: return "run_feedback" diff --git a/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py b/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py index a49cdf3f4..d155acc8c 100644 --- a/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py +++ b/tests/slsa_analyzer/checks/test_provenance_l3_content_check.py @@ -26,7 +26,7 @@ class MockGitHubActions(GitHubActions): """Mock the GitHubActions class.""" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: return "run_feedback" diff --git a/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py b/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py index ac33ad4b2..c82eae450 100644 --- a/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py +++ b/tests/slsa_analyzer/checks/test_trusted_builder_l3_check.py @@ -20,7 +20,7 @@ class MockGitHubActions(GitHubActions): """Mock the GitHubActions class.""" def has_latest_run_passed( - self, repo_full_name: str, branch_name: str, commit_sha: str, commit_date: str, workflow: str + self, repo_full_name: str, branch_name: str | None, commit_sha: str, commit_date: str, workflow: str ) -> str: return "run_feedback"