diff --git a/.slugignore b/.slugignore index 5082ea21..6a295170 100644 --- a/.slugignore +++ b/.slugignore @@ -2,4 +2,12 @@ cres/ .devcontainer/ docs/ application/tests -application/frontend/src/test/basic-e2e.test.ts \ No newline at end of file +application/frontend/src/test/basic-e2e.test.ts +.github +README.md +LICENSE +CONTRIBUTING.md +.gitignore +.dockerignore +Dockerfile-dev +docker-entrypoint.sh \ No newline at end of file diff --git a/Makefile b/Makefile index 193ec538..dfc7e807 100644 --- a/Makefile +++ b/Makefile @@ -92,7 +92,12 @@ migrate-downgrade: import-all: [ -d "./venv" ] && . ./venv/bin/activate - rm -rf standards_cache.sqlite && make migrate-upgrade && export FLASK_APP=$(CURDIR)/cre.py && python cre.py --add --from_spreadsheet https://docs.google.com/spreadsheets/d/1eZOEYgts7d_-Dr-1oAbogPfzBLh6511b58pX3b59kvg && python cre.py --generate_embeddings && python cre.py --zap_in --cheatsheets_in --github_tools_in --capec_in --owasp_secure_headers_in --pci_dss_4_in --juiceshop_in --dsomm_in && python cre.py --generate_embeddings + rm -rf standards_cache.sqlite &&\ + make migrate-upgrade && export FLASK_APP=$(CURDIR)/cre.py &&\ + python cre.py --add --from_spreadsheet https://docs.google.com/spreadsheets/d/1eZOEYgts7d_-Dr-1oAbogPfzBLh6511b58pX3b59kvg &&\ + python cre.py --generate_embeddings && \ + python cre.py --zap_in --cheatsheets_in --github_tools_in --capec_in --owasp_secure_headers_in --pci_dss_4_in --juiceshop_in --dsomm_in --cloud_native_security_controls_in &&\ + python cre.py --generate_embeddings import-neo4j: [ -d "./venv" ] && . ./venv/bin/activate diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index b3669db6..b1343f04 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -26,6 +26,7 @@ pci_dss, juiceshop, dsomm, + cloud_native_security_controls, ) from application.prompt_client import prompt_client as prompt_client from application.utils import gap_analysis @@ -419,7 +420,10 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover dsomm.parse( cache=db_connect(args.cache_file), ) - + if args.cloud_native_security_controls_in: + cloud_native_security_controls.parse( + cache=db_connect(args.cache_file), + ) if args.generate_embeddings: generate_embeddings(args.cache_file) if args.owasp_proj_meta: diff --git a/application/utils/external_project_parsers/cloud_native_security_controls.py b/application/utils/external_project_parsers/cloud_native_security_controls.py new file mode 100644 index 00000000..3605f12e --- /dev/null +++ b/application/utils/external_project_parsers/cloud_native_security_controls.py @@ -0,0 +1,85 @@ +from io import StringIO +import csv +import urllib +from pprint import pprint +import logging +import os +from typing import Dict, Any +from application.database import db +from application.defs import cre_defs as defs +import re +from application.utils import spreadsheet as sheet_utils +from application.prompt_client import prompt_client as prompt_client +import requests + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +def parse( + cache: db.Node_collection, +): + prompt = prompt_client.PromptHandler(cache) + resp = requests.get( + "https://raw.githubusercontent.com/cloud-native-security-controls/controls-catalog/main/controls/controls_catalog.csv" + ) + + if resp.status_code != 200: + logger.fatal( + f"could not retrieve cnsclenges yaml, status code {resp.status_code}" + ) + return + entries = csv.DictReader(StringIO(resp.text), delimiter=",") + for entry in entries: + cnsc = defs.Standard( + description=entry.get("Control Implementation"), + name="Cloud Native Security Controls", + section=entry.get("Section"), + sectionID=entry.get("ID"), + subsection=entry.get("Control Title"), + hyperlink="https://github.com/cloud-native-security-controls/controls-catalog/blob/main/controls/controls_catalog.csv#L" + + str(entry.get("ID") + 1), + version=entry.get("Originating Document"), + ) + existing = cache.get_nodes( + name=cnsc.name, section=cnsc.section, sectionID=cnsc.sectionID + ) + if existing: + embeddings = cache.get_embeddings_for_doc(existing[0]) + if embeddings: + logger.info( + f"Node {cnsc.todict()} already exists and has embeddings, skipping" + ) + continue + cnsc_embeddings = prompt.get_text_embeddings(cnsc.subsection) + cre_id = prompt.get_id_of_most_similar_cre(cnsc_embeddings) + if not cre_id: + logger.info( + f"could not find an appropriate CRE for Clound Native Security Control {cnsc.section}, findings similarities with standards instead" + ) + standard_id = prompt.get_id_of_most_similar_node(cnsc_embeddings) + dbstandard = cache.get_node_by_db_id(standard_id) + logger.info( + f"found an appropriate standard for Cloud Native Security Control {cnsc.section}:{cnsc.subsection}, it is: {dbstandard.name}:{dbstandard.section}" + ) + cres = cache.find_cres_of_node(dbstandard) + if cres: + cre_id = cres[0].id + cre = cache.get_cre_by_db_id(cre_id) + cnsc_copy = cnsc.shallow_copy() + cnsc_copy.description = "" + dbnode = cache.add_node(cnsc_copy) + if not dbnode: + logger.error(f"could not store database node {cnsc_copy.__repr__()}") + continue + cache.add_embedding( + dbnode, cnsc_copy.doctype, cnsc_embeddings, cnsc_copy.__repr__() + ) + if cre: + cache.add_link(db.dbCREfromCRE(cre), dbnode) + logger.info(f"successfully stored {cnsc_copy.__repr__()}") + else: + logger.info( + f"stored {cnsc_copy.__repr__()} but could not link it to any CRE reliably" + ) diff --git a/cre.py b/cre.py index 7ae0d590..432748c9 100644 --- a/cre.py +++ b/cre.py @@ -184,7 +184,12 @@ def main() -> None: parser.add_argument( "--dsomm_in", action="store_true", - help="import dsomm from their repo", + help="import dsomm from their repo (https://raw.githubusercontent.com/devsecopsmaturitymodel/DevSecOps-MaturityModel-data/main/src/assets/YAML/generated/generated.yaml)", + ) + parser.add_argument( + "--cloud_native_security_controls_in", + action="store_true", + help="import cloud native security controls from their repo (https://raw.githubusercontent.com/cloud-native-security-controls/controls-catalog/main/controls/controls_catalog.csv)", ) parser.add_argument( "--generate_embeddings",