diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index c911ec8f..39512b78 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -225,6 +225,7 @@ def register_standard( standard_entries: List[defs.Standard], collection: db.Node_collection, generate_embeddings=True, + calculate_gap_analysis=True, db_connection_str: str = "", ): if os.environ.get("CRE_NO_GEN_EMBEDDINGS"): @@ -265,7 +266,7 @@ def register_standard( # calculate gap analysis jobs = [] pending_stadards = collection.standards() - if not os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"): + if calculate_gap_analysis and not os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"): for standard_name in pending_stadards: if standard_name == importing_name: continue diff --git a/application/tests/capec_parser_test.py b/application/tests/capec_parser_test.py index 6a01289d..d548b596 100644 --- a/application/tests/capec_parser_test.py +++ b/application/tests/capec_parser_test.py @@ -61,7 +61,7 @@ class fakeRequest: version="3.7", ), ] - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, capec_parser.Capec().name) self.assertEqual(len(nodes), 2) self.assertCountEqual(nodes[0].todict(), expected[0].todict()) diff --git a/application/tests/ccmv4_parser_test.py b/application/tests/ccmv4_parser_test.py index a8144daa..410acf4e 100644 --- a/application/tests/ccmv4_parser_test.py +++ b/application/tests/ccmv4_parser_test.py @@ -71,7 +71,7 @@ # version="v4.0", # ), # ] -# for name, nodes in entries.items(): +# for name, nodes in entries.results.items(): # self.assertEqual(name, ccmv4.CloudControlsMatrix().name) # self.assertEqual(len(nodes), 2) # self.assertCountEqual(nodes[0].todict(), expected[0].todict()) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index e08e6d16..bd27aa13 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -47,7 +47,7 @@ class Repo: links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)], ) self.maxDiff = None - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, cheatsheets_parser.Cheatsheets().name) self.assertEqual(len(nodes), 1) self.assertCountEqual(expected.todict(), nodes[0].todict()) diff --git a/application/tests/cloud_native_security_controls_parser_test.py b/application/tests/cloud_native_security_controls_parser_test.py index 54600935..63a63c53 100644 --- a/application/tests/cloud_native_security_controls_parser_test.py +++ b/application/tests/cloud_native_security_controls_parser_test.py @@ -87,7 +87,7 @@ class fakeRequest: version="CNSWP v1.0", ), ] - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual( name, cloud_native_security_controls.CloudNativeSecurityControls().name ) diff --git a/application/tests/cwe_parser_test.py b/application/tests/cwe_parser_test.py index d6f8cc16..6e30d8d0 100644 --- a/application/tests/cwe_parser_test.py +++ b/application/tests/cwe_parser_test.py @@ -84,7 +84,7 @@ def iter_content(self, chunk_size=None): ], ), ] - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, cwe.CWE().name) self.assertEqual(len(nodes), 2) self.assertCountEqual(nodes[0].todict(), expected[0].todict()) diff --git a/application/tests/dsomm_parser_test.py b/application/tests/dsomm_parser_test.py index 38cdfc98..cc37d64e 100644 --- a/application/tests/dsomm_parser_test.py +++ b/application/tests/dsomm_parser_test.py @@ -96,7 +96,7 @@ class fakeRequest: subsection="Defined build process", ), ] - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, dsomm.DSOMM().name) self.assertEqual(len(nodes), 2) self.assertCountEqual(nodes[0].todict(), expected[0].todict()) diff --git a/application/tests/juiceshop_test.py b/application/tests/juiceshop_test.py index 0cdb16ed..46c3c241 100644 --- a/application/tests/juiceshop_test.py +++ b/application/tests/juiceshop_test.py @@ -84,7 +84,7 @@ class fakeRequest: tooltype=defs.ToolTypes.Training, ), ] - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, juiceshop.JuiceShop().name) self.assertEqual(len(nodes), 2) self.assertCountEqual(nodes[0].todict(), expected[0].todict()) diff --git a/application/tests/misc_tools_parser_test.py b/application/tests/misc_tools_parser_test.py index c7dc45d7..24f71cee 100644 --- a/application/tests/misc_tools_parser_test.py +++ b/application/tests/misc_tools_parser_test.py @@ -62,7 +62,7 @@ def test_document_todict( entries = misc_tools_parser.MiscTools().parse( cache=collection, ph=PromptHandler(database=self.collection) ) - for name, tools in entries.items(): + for name, tools in entries.results.items(): self.assertEqual(name, "OWASP WrongSecrets") self.assertEqual(len(tools), 1) self.assertCountEqual(expected.todict(), tools[0].todict()) diff --git a/application/tests/secure_headers_parser_test.py b/application/tests/secure_headers_parser_test.py index 0b85d481..cc1742b9 100644 --- a/application/tests/secure_headers_parser_test.py +++ b/application/tests/secure_headers_parser_test.py @@ -47,7 +47,7 @@ class Repo: section="headerAsection", links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)], ) - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, secure_headers.SecureHeaders().name) self.maxDiff = None diff --git a/application/tests/zap_alerts_parser_test.py b/application/tests/zap_alerts_parser_test.py index c7664f8e..b0cb3482 100644 --- a/application/tests/zap_alerts_parser_test.py +++ b/application/tests/zap_alerts_parser_test.py @@ -106,7 +106,7 @@ def test_register_zap_alert_top_10_tags(self, mock_git) -> None: cache=self.collection, ph=prompt_client.PromptHandler(database=self.collection), ) - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, zap_alerts_parser.ZAP().name) expected = defs.Tool( name="ZAP Rule", @@ -205,7 +205,7 @@ def test_register_zap_alert_cwe(self, mock_git) -> None: ], ) self.maxDiff = None - for name, nodes in entries.items(): + for name, nodes in entries.results.items(): self.assertEqual(name, zap_alerts_parser.ZAP().name) self.assertEqual(len(nodes), 1) self.assertCountEqual(expected.todict(), nodes[0].todict()) diff --git a/application/utils/external_project_parsers/base_parser.py b/application/utils/external_project_parsers/base_parser.py index 856e39ec..54f724d4 100644 --- a/application/utils/external_project_parsers/base_parser.py +++ b/application/utils/external_project_parsers/base_parser.py @@ -1,8 +1,6 @@ -from application.database import db -from application.defs import cre_defs as defs +from application.utils.external_project_parsers.base_parser_defs import base_parser_defs from rq import Queue from application.utils import redis -from typing import List, Dict, Optional from application.prompt_client import prompt_client as prompt_client import logging import time @@ -15,32 +13,12 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -# abstract class/interface that shows how to import a project that is not cre or its core resources - - -class ParserInterface(object): - # The name of the resource being parsed - name: str - - def parse( - database: db.Node_collection, - prompt_client: Optional[prompt_client.PromptHandler], - ) -> Dict[str, List[defs.Document]]: - """ - Parses the resources of a project, - links the resource of the project to CREs - this can be done either using glue resources, AI or any other supported method - then calls cre_main.register_node - Returns a dict with a key of the resource for importing and a value of list of documents with CRE links, optionally with their embeddings filled in - """ - raise NotImplementedError - class BaseParser: @classmethod def register_resource( self, - sclass: ParserInterface, + sclass: base_parser_defs.ParserInterface, db_connection_str: str, ): from application.cmd import cre_main @@ -58,10 +36,15 @@ def register_resource( ) return - result = sclass_instance.parse(db, ph) + resultObj = sclass_instance.parse(db, ph) try: - for _, documents in result.items(): - cre_main.register_standard(documents, db) + for _, documents in resultObj.results.items(): + cre_main.register_standard( + standard_entries=documents, + db_connection_str=db, + calculate_gap_analysis=resultObj.calculate_gap_analysis, + generate_embeddings=resultObj.calculate_embeddings, + ) except ValueError as ve: err_str = f"error importing {sclass.name}, err: {ve}" raise ValueError(err_str) @@ -79,7 +62,7 @@ def call_importers(self, db_connection_str: str): if os.environ.get("CRE_IMPORTERS_IMPORT_ONLY"): import_only = json.loads(os.environ.get("CRE_IMPORTERS_IMPORT_ONLY")) - for subclass in ParserInterface.__subclasses__(): + for subclass in base_parser_defs.ParserInterface.__subclasses__(): if import_only and subclass.name not in import_only: continue diff --git a/application/utils/external_project_parsers/parsers/capec_parser.py b/application/utils/external_project_parsers/parsers/capec_parser.py index 79b799a4..0da07b02 100644 --- a/application/utils/external_project_parsers/parsers/capec_parser.py +++ b/application/utils/external_project_parsers/parsers/capec_parser.py @@ -11,7 +11,10 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) from application.prompt_client import prompt_client as prompt_client @@ -22,7 +25,11 @@ class Capec(ParserInterface): def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): xml = requests.get(self.capec_xml) if xml.status_code == 200: - return {self.name: self.register_capec(xml_contents=xml.text, cache=cache)} + return ParseResult( + results={ + self.name: self.register_capec(xml_contents=xml.text, cache=cache) + } + ) else: logger.fatal(f"Could not get CAPEC's XML data, error was {xml.text}") diff --git a/application/utils/external_project_parsers/parsers/ccmv4.py b/application/utils/external_project_parsers/parsers/ccmv4.py index 217f06bc..5aeb20bb 100644 --- a/application/utils/external_project_parsers/parsers/ccmv4.py +++ b/application/utils/external_project_parsers/parsers/ccmv4.py @@ -9,7 +9,10 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) from application.prompt_client import prompt_client as prompt_client from application.utils import spreadsheet as sheet_utils diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index c3b78981..2eb93335 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -5,7 +5,10 @@ from application.defs import cre_defs as defs import os import re -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) from application.prompt_client import prompt_client as prompt_client @@ -29,7 +32,7 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): cheatsheets = self.register_cheatsheets( repo=repo, cache=cache, cheatsheets_path=cheatsheets_path, repo_path=c_repo ) - return {self.name: cheatsheets} + return ParseResult(results={self.name: cheatsheets}) def register_cheatsheets( self, cache: db.Node_collection, repo, cheatsheets_path, repo_path diff --git a/application/utils/external_project_parsers/parsers/cloud_native_security_controls.py b/application/utils/external_project_parsers/parsers/cloud_native_security_controls.py index f636c4cf..1d492fa5 100644 --- a/application/utils/external_project_parsers/parsers/cloud_native_security_controls.py +++ b/application/utils/external_project_parsers/parsers/cloud_native_security_controls.py @@ -4,7 +4,10 @@ from application.database import db from application.defs import cre_defs as defs from application.prompt_client import prompt_client as prompt_client -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) import requests logging.basicConfig() @@ -79,4 +82,4 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): f"stored {cnsc.__repr__()} but could not link it to any CRE reliably" ) standard_entries.append(cnsc) - return {self.name: standard_entries} + return ParseResult(results={self.name: standard_entries}) diff --git a/application/utils/external_project_parsers/parsers/cwe.py b/application/utils/external_project_parsers/parsers/cwe.py index 68c33200..b0821aba 100644 --- a/application/utils/external_project_parsers/parsers/cwe.py +++ b/application/utils/external_project_parsers/parsers/cwe.py @@ -7,8 +7,11 @@ from application.defs import cre_defs as defs import shutil import xmltodict -from application.utils.external_project_parsers.base_parser import ParserInterface from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) logging.basicConfig() logger = logging.getLogger(__name__) @@ -33,11 +36,14 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): for _, _, files in os.walk(tmp_dir, topdown=False): for file in files: if file.startswith("cwe") and file.endswith(".xml"): - return { - self.name: self.register_cwe( - xml_file=os.path.join(tmp_dir, file), cache=cache - ), - } + return ParseResult( + results={ + self.name: self.register_cwe( + xml_file=os.path.join(tmp_dir, file), cache=cache + ), + }, + calculate_gap_analysis=False, + ) raise RuntimeError("there is no file named cwe.xml in the target zip") def make_hyperlink(self, cwe_id: int): diff --git a/application/utils/external_project_parsers/parsers/dsomm.py b/application/utils/external_project_parsers/parsers/dsomm.py index 0f4c5c36..48c57e29 100644 --- a/application/utils/external_project_parsers/parsers/dsomm.py +++ b/application/utils/external_project_parsers/parsers/dsomm.py @@ -4,8 +4,11 @@ from application.database import db from application.defs import cre_defs as defs from application.prompt_client import prompt_client as prompt_client -from application.utils.external_project_parsers.base_parser import ParserInterface import requests +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) logging.basicConfig() logger = logging.getLogger(__name__) @@ -150,4 +153,4 @@ def parse( # use iso as glue standard = self.link_to_iso(aname, activity, cache, standard) standard_entries.append(standard) - return {self.name: standard_entries} + return ParseResult(results={self.name: standard_entries}) diff --git a/application/utils/external_project_parsers/parsers/iso27001.py b/application/utils/external_project_parsers/parsers/iso27001.py index 1e1ff115..8a3fc2eb 100644 --- a/application/utils/external_project_parsers/parsers/iso27001.py +++ b/application/utils/external_project_parsers/parsers/iso27001.py @@ -9,8 +9,11 @@ from simplify_docx import simplify import docx import tempfile -from application.utils.external_project_parsers.base_parser import ParserInterface from application.prompt_client import prompt_client as prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) from typing import List logging.basicConfig() @@ -77,9 +80,9 @@ class ISO27001(ParserInterface): # return nist_table def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): - return { - self.name: [] - } # the doc above does not have names we get the names from the spreadsheet for now, disable + return ParseResult( + results={self.name: []} + ) # the doc above does not have names we get the names from the spreadsheet for now, disable # url = self.url # documents: List[defs.Standard] = [] # nist_nodes = cache.get_nodes(name="NIST 800-53 v5") diff --git a/application/utils/external_project_parsers/parsers/juiceshop.py b/application/utils/external_project_parsers/parsers/juiceshop.py index 7757aa50..43225569 100644 --- a/application/utils/external_project_parsers/parsers/juiceshop.py +++ b/application/utils/external_project_parsers/parsers/juiceshop.py @@ -7,7 +7,10 @@ from application.defs import cre_defs as defs import re from application.prompt_client import prompt_client as prompt_client -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) import requests logging.basicConfig() @@ -94,4 +97,4 @@ def parse( f"stored {chal.section} but could not link it to any CRE reliably" ) chals.append(chal) - return {self.name: chals} + return ParseResult(results={self.name: chals}) diff --git a/application/utils/external_project_parsers/parsers/misc_tools_parser.py b/application/utils/external_project_parsers/parsers/misc_tools_parser.py index 7a326cc2..e4333b5f 100644 --- a/application/utils/external_project_parsers/parsers/misc_tools_parser.py +++ b/application/utils/external_project_parsers/parsers/misc_tools_parser.py @@ -10,7 +10,10 @@ from application.defs import cre_defs as defs from application.utils import git from application.prompt_client import prompt_client as prompt_client -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) import requests logging.basicConfig() @@ -31,7 +34,7 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): for url in self.tool_urls: tool_entries = self.parse_tool(cache=cache, tool_repo=url) tools[tool_entries[0].name] = tool_entries - return tools + return ParseResult(results=tools) def parse_tool( self, tool_repo: str, cache: db.Node_collection, dry_run: boolean = False diff --git a/application/utils/external_project_parsers/parsers/pci_dss.py b/application/utils/external_project_parsers/parsers/pci_dss.py index b1a90f56..bafc2091 100644 --- a/application/utils/external_project_parsers/parsers/pci_dss.py +++ b/application/utils/external_project_parsers/parsers/pci_dss.py @@ -7,7 +7,10 @@ import re from application.utils import spreadsheet as sheet_utils from application.prompt_client import prompt_client as prompt_client -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) logging.basicConfig() logger = logging.getLogger(__name__) @@ -26,7 +29,7 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): ), cache=cache, ) - return {self.name: entries} + return ParseResult(results={self.name: entries}) def __parse( self, diff --git a/application/utils/external_project_parsers/parsers/secure_headers.py b/application/utils/external_project_parsers/parsers/secure_headers.py index 526006db..5f4aafc6 100644 --- a/application/utils/external_project_parsers/parsers/secure_headers.py +++ b/application/utils/external_project_parsers/parsers/secure_headers.py @@ -7,7 +7,10 @@ import os import re from urllib.parse import urlparse, parse_qs -from application.utils.external_project_parsers.base_parser import ParserInterface +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) from application.prompt_client import prompt_client as prompt_client # GENERIC Markdown file parser for self-contained links! when we have more projects using this setup add them in the list @@ -31,7 +34,7 @@ def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): entries = self.register_headers( repo=repo, cache=cache, file_path=file_path, repo_path=sh_repo ) - return {self.name: entries} + return ParseResult(results={self.name: entries}) def register_headers(self, cache: db.Node_collection, repo, file_path, repo_path): cre_link = r"\[([\w\s\d]+)\]\((?P((?:\/|https:\/\/)(www\.)?opencre\.org/cre/(?P\d+-\d+)\?[\w\d\.\/\=\#\+\&\%\-]+))\)" diff --git a/application/utils/external_project_parsers/parsers/zap_alerts_parser.py b/application/utils/external_project_parsers/parsers/zap_alerts_parser.py index fad57c38..85488b45 100644 --- a/application/utils/external_project_parsers/parsers/zap_alerts_parser.py +++ b/application/utils/external_project_parsers/parsers/zap_alerts_parser.py @@ -13,8 +13,11 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -from application.utils.external_project_parsers.base_parser import ParserInterface from application.prompt_client import prompt_client as prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParserInterface, + ParseResult, +) class ZAP(ParserInterface): @@ -48,7 +51,7 @@ def parse( zaproxy_website = "https://github.com/zaproxy/zaproxy-website.git" repo = git.clone(zaproxy_website) alerts = self.__register_alerts(repo=repo, cache=cache) - return {self.name: alerts} + return ParseResult(results={self.name: alerts}) def __link_to_top10( self, alert: defs.Tool, top10: re.Match[str] | None, cache: db.Node_collection