From cc87efe5e131730775dd92db8cde45395cab93c2 Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 6 Sep 2023 20:40:42 +0100 Subject: [PATCH 01/75] trigger first staging build --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e46c683a8..6ccf6a916 100644 --- a/README.md +++ b/README.md @@ -84,4 +84,4 @@ Please see [Contributing](CONTRIBUTING.md) for contributing instructions Roadmap --- -For a roadmap of what we would like to be done please see the [issues](https://github.com/OWASP/common-requirement-enumeration/issues). +For a roadmap of what we would like to be done please see the [issues](https://github.com/OWASP/common-requirement-enumeration/issues). \ No newline at end of file From 322e5dd2e3ab499ce2ffa329805725f8a8449a9b Mon Sep 17 00:00:00 2001 From: john681611 Date: Wed, 2 Aug 2023 16:11:15 +0100 Subject: [PATCH 02/75] Inital Hack of NEO4j DB creation --- application/database/db.py | 47 +++++++++++++++++++++++ requirements.txt | 76 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/application/database/db.py b/application/database/db.py index 1c3c1a3af..988cebedb 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,3 +1,4 @@ +from neo4j import GraphDatabase from sqlalchemy.orm import aliased import os import logging @@ -179,6 +180,13 @@ def add_node(self, *args, **kwargs): @classmethod def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: if dbcre: + Neo4j_driver.execute_query( + "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", + nid=dbcre.id, + name=dbcre.name, + description=dbcre.description, + external_id=dbcre.external_id, + database_="neo4j") graph.add_node( f"CRE: {dbcre.id}", internal_id=dbcre.id, external_id=dbcre.external_id ) @@ -189,6 +197,21 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: @classmethod def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: + Neo4j_driver.execute_query( + "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", + nid=dbnode.id, + name=dbnode.name, + section=dbnode.section, + section_id=dbnode.section_id, + subsection=dbnode.subsection or "", + tags=dbnode.tags, + version=dbnode.version or "", + description=dbnode.description, + ntype=dbnode.ntype, + database_="neo4j") + + # coma separated tags + graph.add_node( "Node: " + str(dbnode.id), internal_id=dbnode.id, @@ -215,6 +238,16 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {il.group}", f"CRE: {il.cre}", ltype=il.type) + Neo4j_driver.execute_query( + "MATCH (a:CRE), (b:CRE) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=il.group, + bID=il.cre, + relType=str.upper(il.type).replace(' ', '_'), + database_="neo4j") for lnk in session.query(Links).all(): node = session.query(Node).filter(Node.id == lnk.node).first() @@ -226,6 +259,16 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {lnk.cre}", f"Node: {str(lnk.node)}", ltype=lnk.type) + Neo4j_driver.execute_query( + "MATCH (a:CRE), (b:Node) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=lnk.cre, + bID=lnk.node, + relType=str.upper(lnk.type).replace(' ', '_'), + database_="neo4j") return graph @@ -1427,3 +1470,7 @@ def dbCREfromCRE(cre: cre_defs.CRE) -> CRE: external_id=cre.id, tags=",".join(tags), ) + +URI = "neo4j://localhost:7687" +AUTH = ("neo4j", "password") +Neo4j_driver = GraphDatabase.driver(URI, auth=AUTH) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7eb93e3db..025dbc184 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,5 +29,81 @@ semver setuptools==66.1.1 simplify_docx==0.1.2 SQLAlchemy==2.0.20 +compliance-trestle +nose==1.3.7 +numpy==1.23.0 +neo4j==5.11.0 +openapi-schema-validator==0.3.4 +openapi-spec-validator==0.5.1 +openpyxl==3.1.0 +orderedmultidict==1.0.1 +orjson==3.8.5 +packaging +paramiko==3.0.0 +pathable==0.4.3 +pathspec==0.9.0 +pbr==5.8.0 +pep517==0.8.2 +Pillow==9.1.1 +pip-autoremove==0.9.1 +platformdirs==2.2.0 +playwright==1.33.0 +pluggy==1.0.0 +prance +prompt-toolkit==3.0.19 +proto-plus==1.22.2 +protobuf==4.23.1 +psycopg2==2.9.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycodestyle==2.7.0 +pycparser==2.21 +pydantic==1.10.4 +pyee==9.0.4 +pyflakes==2.3.1 +PyGithub==1.53 +PyJWT==1.7.1 +PyNaCl==1.5.0 +pyparsing==2.4.6 +pyrsistent==0.17.3 +PySnooper==1.1.1 +pytest==7.3.1 +pytest-base-url==2.0.0 +pytest-playwright==0.3.3 +python-dateutil==2.8.1 +python-docx==0.8.11 +python-dotenv==0.21.1 +python-frontmatter==1.0.0 +python-markdown-maker==1.0 +python-slugify==8.0.1 +PyYAML==5.3.1 +regex==2021.11.10 +requests==2.27.1 +requests-oauthlib==1.3.1 +rfc3986==1.5.0 +rsa==4.7 +ruamel.yaml==0.17.21 +ruamel.yaml.clib==0.2.7 +scikit-learn==1.2.2 +Shapely==1.8.5.post1 +simplify-docx==0.1.2 +six==1.15.0 +smmap==3.0.4 +sniffio==1.3.0 +soupsieve==2.4.1 +SQLAlchemy==1.3.23 +sqlalchemy-stubs==0.4 +testresources==2.0.1 +text-unidecode==1.3 +threadpoolctl==3.1.0 +toml==0.10.2 +tomli==1.2.2 +tqdm==4.65.0 +typed-ast==1.5.4 +types-PyYAML==5.4.8 +typing-inspect==0.7.1 +typing_extensions==4.4.0 +untangle==1.1.1 +urllib3==1.26.8 vertexai==0.0.1 xmltodict==0.13.0 From 9a7b213e75a5804933b752d50d1b7836820cf9cf Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 10 Aug 2023 13:50:10 +0100 Subject: [PATCH 03/75] Added: Neo4j docker run --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 1e9f86ac7..ef43d81e7 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,9 @@ docker: docker-run: docker run -it -p 5000:5000 opencre:$(shell git rev-parse HEAD) +docker-neo4j: + docker run --env NEO4J_PLUGINS='["apoc"]' --volume=/Users/johnharvey/neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j + lint: [ -d "./venv" ] && . ./venv/bin/activate && black . && yarn lint From d67922df160bd353086ad0a357c5deba943ed0d0 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 10 Aug 2023 13:50:20 +0100 Subject: [PATCH 04/75] Added NEO_DB Class --- application/database/db.py | 144 +++++++++++++++++++++++++------------ 1 file changed, 98 insertions(+), 46 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 988cebedb..f601bc14c 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,4 +1,6 @@ + from neo4j import GraphDatabase +import neo4j from sqlalchemy.orm import aliased import os import logging @@ -157,14 +159,102 @@ class Embeddings(BaseModel): # type: ignore ) + +class NEO_DB: + __instance = None + + driver = None + connected = False + @classmethod + def instance(self): + if self.__instance is None: + self.__instance = self.__new__(self) + + URI = os.getenv('NEO4J_URI') or "neo4j://localhost:7687" + AUTH = (os.getenv('NEO4J_USR') or "neo4j", os.getenv('NEO4J_PASS') or "password") + self.driver = GraphDatabase.driver(URI, auth=AUTH) + + try: + self.driver.verify_connectivity() + self.connected = True + except neo4j.exceptions.ServiceUnavailable: + logger.error("NEO4J ServiceUnavailable error - disabling neo4j related features") + + return self.__instance + + def __init__(sel): + raise ValueError("NEO_DB is a singleton, please call instance() instead") + + @classmethod + def add_cre(self, dbcre: CRE): + if not self.connected: + return + self.driver.execute_query( + "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", + nid=dbcre.id, + name=dbcre.name, + description=dbcre.description, + external_id=dbcre.external_id, + database_="neo4j") + + @classmethod + def add_dbnode(self, dbnode: Node): + if not self.connected: + return + self.driver.execute_query( + "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", + nid=dbnode.id, + name=dbnode.name, + section=dbnode.section, + section_id=dbnode.section_id, + subsection=dbnode.subsection or "", + tags=dbnode.tags, + version=dbnode.version or "", + description=dbnode.description, + ntype=dbnode.ntype, + database_="neo4j") + + @classmethod + def link_CRE_to_CRE(self, id1, id2, link_type): + if not self.connected: + return + self.driver.execute_query( + "MATCH (a:CRE), (b:CRE) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=id1, + bID=id2, + relType=str.upper(link_type).replace(' ', '_'), + database_="neo4j") + + @classmethod + def link_CRE_to_Node(self, CRE_id, node_id, link_type): + if not self.connected: + return + self.driver.execute_query( + "MATCH (a:CRE), (b:Node) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=CRE_id, + bID=node_id, + relType=str.upper(link_type).replace(' ', '_'), + database_="neo4j") + + class CRE_Graph: graph: nx.Graph = None + neo_db: NEO_DB = None __instance = None @classmethod - def instance(cls, session): + def instance(cls, session, neo_db: NEO_DB): if cls.__instance is None: cls.__instance = cls.__new__(cls) + cls.neo_db = neo_db cls.graph = cls.load_cre_graph(session) return cls.__instance @@ -180,13 +270,7 @@ def add_node(self, *args, **kwargs): @classmethod def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: if dbcre: - Neo4j_driver.execute_query( - "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", - nid=dbcre.id, - name=dbcre.name, - description=dbcre.description, - external_id=dbcre.external_id, - database_="neo4j") + cls.neo_db.add_cre(dbcre) graph.add_node( f"CRE: {dbcre.id}", internal_id=dbcre.id, external_id=dbcre.external_id ) @@ -197,19 +281,7 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: @classmethod def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: - Neo4j_driver.execute_query( - "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", - nid=dbnode.id, - name=dbnode.name, - section=dbnode.section, - section_id=dbnode.section_id, - subsection=dbnode.subsection or "", - tags=dbnode.tags, - version=dbnode.version or "", - description=dbnode.description, - ntype=dbnode.ntype, - database_="neo4j") - + cls.neo_db.add_dbnode(dbnode) # coma separated tags graph.add_node( @@ -238,16 +310,7 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {il.group}", f"CRE: {il.cre}", ltype=il.type) - Neo4j_driver.execute_query( - "MATCH (a:CRE), (b:CRE) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=il.group, - bID=il.cre, - relType=str.upper(il.type).replace(' ', '_'), - database_="neo4j") + cls.neo_db.link_CRE_to_CRE(il.group, il.cre, il.type) for lnk in session.query(Links).all(): node = session.query(Node).filter(Node.id == lnk.node).first() @@ -259,26 +322,19 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {lnk.cre}", f"Node: {str(lnk.node)}", ltype=lnk.type) - Neo4j_driver.execute_query( - "MATCH (a:CRE), (b:Node) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=lnk.cre, - bID=lnk.node, - relType=str.upper(lnk.type).replace(' ', '_'), - database_="neo4j") + cls.neo_db.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type) return graph class Node_collection: graph: nx.Graph = None + neo_db: NEO_DB = None session = sqla.session def __init__(self) -> None: if not os.environ.get("NO_LOAD_GRAPH"): - self.graph = CRE_Graph.instance(sqla.session) + self.neo_db = NEO_DB.instance() + self.graph = CRE_Graph.instance(sqla.session, self.neo_db) self.session = sqla.session def __get_external_links(self) -> List[Tuple[CRE, Node, str]]: @@ -1470,7 +1526,3 @@ def dbCREfromCRE(cre: cre_defs.CRE) -> CRE: external_id=cre.id, tags=",".join(tags), ) - -URI = "neo4j://localhost:7687" -AUTH = ("neo4j", "password") -Neo4j_driver = GraphDatabase.driver(URI, auth=AUTH) \ No newline at end of file From 914152d39fddd9d9ad8da47bd407f9df5cd31def Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 10 Aug 2023 15:26:16 +0100 Subject: [PATCH 05/75] Create Inital Path API response --- application/database/db.py | 99 +++++++++++++++++++++++++++++-------- application/web/web_main.py | 14 ++++-- 2 files changed, 88 insertions(+), 25 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index f601bc14c..f10b19c90 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -243,7 +243,64 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type): bID=node_id, relType=str.upper(link_type).replace(' ', '_'), database_="neo4j") + @classmethod + def gap_analysis(self, name_1, name_2): + if not self.connected: + return + records, _, _ = self.driver.execute_query( + "MATCH" + "(BaseStandard:Node {name: $name1}), " + "(CompareStandard:Node {name: $name2}), " + "p = shortestPath((BaseStandard)-[*]-(CompareStandard)) " + "WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) " + "RETURN p ", + name1=name_1, + name2=name_2, + database_="neo4j" + ) + def format_segment(seg): + return { + "start": { + "name": seg.start_node["name"], + "sectionID": seg.start_node["section_id"], + "section": seg.start_node["section"], + "subsection": seg.start_node["subsection"], + "description": seg.start_node["description"], + "id": seg.start_node["id"] + }, + "end": { + "name": seg.end_node["name"], + "sectionID": seg.end_node["section_id"], + "section": seg.end_node["section"], + "subsection": seg.end_node["subsection"], + "description": seg.end_node["description"], + "id": seg.end_node["id"] + }, + "relationship": seg.type + } + + def format_record(rec): + return { + "start": { + "name": rec.start_node["name"], + "sectionID": rec.start_node["section_id"], + "section": rec.start_node["section"], + "subsection": rec.start_node["subsection"], + "description": rec.start_node["description"], + "id": rec.start_node["id"] + }, + "end": { + "name": rec.end_node["name"], + "sectionID": rec.end_node["section_id"], + "section": rec.end_node["section"], + "subsection": rec.end_node["subsection"], + "description": rec.end_node["description"], + "id": rec.end_node["id"] + }, + "path": [format_segment(seg) for seg in rec.relationships] + } + return [format_record(rec['p']) for rec in records] class CRE_Graph: graph: nx.Graph = None @@ -255,7 +312,7 @@ def instance(cls, session, neo_db: NEO_DB): if cls.__instance is None: cls.__instance = cls.__new__(cls) cls.neo_db = neo_db - cls.graph = cls.load_cre_graph(session) + # cls.graph = cls.load_cre_graph(session) return cls.__instance def __init__(sel): @@ -1158,30 +1215,30 @@ def find_path_between_nodes( return res - def gap_analysis(self, node_names: List[str]) -> List[cre_defs.Node]: + def gap_analysis(self, node_names: List[str]): """Since the CRE structure is a tree-like graph with leaves being nodes we can find the paths between nodes find_path_between_nodes() is a graph-path-finding method """ - processed_nodes = [] - dbnodes: List[Node] = [] - for name in node_names: - dbnodes.extend(self.session.query(Node).filter(Node.name == name).all()) - - for node in dbnodes: - working_node = nodeFromDB(node) - for other_node in dbnodes: - if node.id == other_node.id: - continue - if self.find_path_between_nodes(node.id, other_node.id): - working_node.add_link( - cre_defs.Link( - ltype=cre_defs.LinkTypes.LinkedTo, - document=nodeFromDB(other_node), - ) - ) - processed_nodes.append(working_node) - return processed_nodes + # processed_nodes = [] + # dbnodes: List[Node] = [] + # for name in node_names: + # dbnodes.extend(self.session.query(Node).filter(Node.name == name).all()) + + # for node in dbnodes: + # working_node = nodeFromDB(node) + # for other_node in dbnodes: + # if node.id == other_node.id: + # continue + # if self.find_path_between_nodes(node.id, other_node.id): + # working_node.add_link( + # cre_defs.Link( + # ltype=cre_defs.LinkTypes.LinkedTo, + # document=nodeFromDB(other_node), + # ) + # ) + # processed_nodes.append(working_node) + return self.neo_db.gap_analysis(node_names[0], node_names[1]) def text_search(self, text: str) -> List[Optional[cre_defs.Document]]: """Given a piece of text, tries to find the best match diff --git a/application/web/web_main.py b/application/web/web_main.py index a3671797d..2675900a3 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -208,10 +208,16 @@ def find_document_by_tag() -> Any: def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet database = db.Node_collection() standards = request.args.getlist("standard") - documents = database.gap_analysis(standards=standards) - if documents: - res = [doc.todict() for doc in documents] - return jsonify(res) + paths = database.gap_analysis(standards) + grouped_paths = {} + for path in paths: + key = path['start']['id'] + if key not in grouped_paths: + grouped_paths[key] = {"start": path['start'], "paths": []} + del path['start'] + grouped_paths[key]['paths'].append(path) + + return jsonify(grouped_paths) @app.route("/rest/v1/text_search", methods=["GET"]) From 89383ced58ad9a3a7797356950af5a52ba024920 Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 11 Aug 2023 17:22:06 +0100 Subject: [PATCH 06/75] Build basic UI for testing --- application/database/db.py | 129 +++++++++-------- application/frontend/src/const.ts | 1 + .../src/pages/GapAnalysis/GapAnalysis.tsx | 133 ++++++++++++++++++ application/frontend/src/routes.tsx | 20 ++- application/web/web_main.py | 10 +- 5 files changed, 228 insertions(+), 65 deletions(-) create mode 100644 application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx diff --git a/application/database/db.py b/application/database/db.py index f10b19c90..f52f66960 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,4 +1,3 @@ - from neo4j import GraphDatabase import neo4j from sqlalchemy.orm import aliased @@ -159,90 +158,100 @@ class Embeddings(BaseModel): # type: ignore ) - class NEO_DB: __instance = None - + driver = None connected = False + @classmethod def instance(self): if self.__instance is None: self.__instance = self.__new__(self) - URI = os.getenv('NEO4J_URI') or "neo4j://localhost:7687" - AUTH = (os.getenv('NEO4J_USR') or "neo4j", os.getenv('NEO4J_PASS') or "password") + URI = os.getenv("NEO4J_URI") or "neo4j://localhost:7687" + AUTH = ( + os.getenv("NEO4J_USR") or "neo4j", + os.getenv("NEO4J_PASS") or "password", + ) self.driver = GraphDatabase.driver(URI, auth=AUTH) try: self.driver.verify_connectivity() self.connected = True - except neo4j.exceptions.ServiceUnavailable: - logger.error("NEO4J ServiceUnavailable error - disabling neo4j related features") - + except neo4j.exceptions.ServiceUnavailable: + logger.error( + "NEO4J ServiceUnavailable error - disabling neo4j related features" + ) + return self.__instance def __init__(sel): raise ValueError("NEO_DB is a singleton, please call instance() instead") - + @classmethod def add_cre(self, dbcre: CRE): - if not self.connected: + if not self.connected: return - self.driver.execute_query( - "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", - nid=dbcre.id, - name=dbcre.name, - description=dbcre.description, - external_id=dbcre.external_id, - database_="neo4j") - + self.driver.execute_query( + "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", + nid=dbcre.id, + name=dbcre.name, + description=dbcre.description, + external_id=dbcre.external_id, + database_="neo4j", + ) + @classmethod def add_dbnode(self, dbnode: Node): if not self.connected: return self.driver.execute_query( - "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", - nid=dbnode.id, - name=dbnode.name, - section=dbnode.section, - section_id=dbnode.section_id, - subsection=dbnode.subsection or "", - tags=dbnode.tags, - version=dbnode.version or "", - description=dbnode.description, - ntype=dbnode.ntype, - database_="neo4j") - + "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", + nid=dbnode.id, + name=dbnode.name, + section=dbnode.section, + section_id=dbnode.section_id, + subsection=dbnode.subsection or "", + tags=dbnode.tags, + version=dbnode.version or "", + description=dbnode.description, + ntype=dbnode.ntype, + database_="neo4j", + ) + @classmethod def link_CRE_to_CRE(self, id1, id2, link_type): if not self.connected: return self.driver.execute_query( - "MATCH (a:CRE), (b:CRE) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=id1, - bID=id2, - relType=str.upper(link_type).replace(' ', '_'), - database_="neo4j") - + "MATCH (a:CRE), (b:CRE) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=id1, + bID=id2, + relType=str.upper(link_type).replace(" ", "_"), + database_="neo4j", + ) + @classmethod def link_CRE_to_Node(self, CRE_id, node_id, link_type): if not self.connected: return self.driver.execute_query( - "MATCH (a:CRE), (b:Node) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=CRE_id, - bID=node_id, - relType=str.upper(link_type).replace(' ', '_'), - database_="neo4j") + "MATCH (a:CRE), (b:Node) " + "WHERE a.id = $aID AND b.id = $bID " + "CALL apoc.create.relationship(a,$relType, {},b) " + "YIELD rel " + "RETURN rel", + aID=CRE_id, + bID=node_id, + relType=str.upper(link_type).replace(" ", "_"), + database_="neo4j", + ) + @classmethod def gap_analysis(self, name_1, name_2): if not self.connected: @@ -256,18 +265,18 @@ def gap_analysis(self, name_1, name_2): "RETURN p ", name1=name_1, name2=name_2, - database_="neo4j" + database_="neo4j", ) def format_segment(seg): - return { + return { "start": { "name": seg.start_node["name"], "sectionID": seg.start_node["section_id"], "section": seg.start_node["section"], "subsection": seg.start_node["subsection"], "description": seg.start_node["description"], - "id": seg.start_node["id"] + "id": seg.start_node["id"], }, "end": { "name": seg.end_node["name"], @@ -275,9 +284,9 @@ def format_segment(seg): "section": seg.end_node["section"], "subsection": seg.end_node["subsection"], "description": seg.end_node["description"], - "id": seg.end_node["id"] + "id": seg.end_node["id"], }, - "relationship": seg.type + "relationship": seg.type, } def format_record(rec): @@ -288,7 +297,7 @@ def format_record(rec): "section": rec.start_node["section"], "subsection": rec.start_node["subsection"], "description": rec.start_node["description"], - "id": rec.start_node["id"] + "id": rec.start_node["id"], }, "end": { "name": rec.end_node["name"], @@ -296,11 +305,13 @@ def format_record(rec): "section": rec.end_node["section"], "subsection": rec.end_node["subsection"], "description": rec.end_node["description"], - "id": rec.end_node["id"] + "id": rec.end_node["id"], }, - "path": [format_segment(seg) for seg in rec.relationships] + "path": [format_segment(seg) for seg in rec.relationships], } - return [format_record(rec['p']) for rec in records] + + return [format_record(rec["p"]) for rec in records] + class CRE_Graph: graph: nx.Graph = None @@ -339,7 +350,7 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: cls.neo_db.add_dbnode(dbnode) - # coma separated tags + # coma separated tags graph.add_node( "Node: " + str(dbnode.id), diff --git a/application/frontend/src/const.ts b/application/frontend/src/const.ts index 231f78447..cc2afdfc8 100644 --- a/application/frontend/src/const.ts +++ b/application/frontend/src/const.ts @@ -36,3 +36,4 @@ export const CRE = '/cre'; export const GRAPH = '/graph'; export const DEEPLINK = '/deeplink'; export const BROWSEROOT = '/root_cres'; +export const GAP_ANALYSIS = '/gap_analysis'; diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx new file mode 100644 index 000000000..99aeff39c --- /dev/null +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -0,0 +1,133 @@ +import React, { useEffect, useState } from 'react'; +import { Dropdown, Label, Popup, Segment, Table } from 'semantic-ui-react'; + +import { useEnvironment } from '../../hooks'; + +const GetSegmentText = (segment, segmentID) => { + let textPart = segment.end; + let nextID = segment.end.id; + let arrow = '->'; + if (segmentID !== segment.start.id) { + textPart = segment.start; + nextID = segment.start.id; + arrow = '<-'; + } + const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID} ${textPart.section} ${textPart.subsection} ${textPart.description}`; + return { text, nextID }; +}; + +export const GapAnalysis = () => { + const standardOptions = [ + { key: '', text: '', value: undefined }, + { key: 'OWASP Top 10 2021', text: 'OWASP Top 10 2021', value: 'OWASP Top 10 2021' }, + { key: 'NIST 800-53 v5', text: 'NIST 800-53 v5', value: 'NIST 800-53 v5' }, + { key: 'ISO 27001', text: 'ISO 27001', value: 'ISO 27001' }, + { key: 'Cloud Controls Matrix', text: 'Cloud Controls Matrix', value: 'Cloud Controls Matrix' }, + { key: 'ASVS', text: 'ASVS', value: 'ASVS' }, + { key: 'OWASP Proactive Controls', text: 'OWASP Proactive Controls', value: 'OWASP Proactive Controls' }, + { key: 'SAMM', text: 'SAMM', value: 'SAMM' }, + { key: 'CWE', text: 'CWE', value: 'CWE' }, + { key: 'OWASP Cheat Sheets', text: 'OWASP Cheat Sheets', value: 'OWASP Cheat Sheets' }, + { + key: 'OWASP Web Security Testing Guide (WSTG)', + text: 'OWASP Web Security Testing Guide (WSTG)', + value: 'OWASP Web Security Testing Guide (WSTG)', + }, + { key: 'NIST 800-63', text: 'NIST 800-63', value: 'NIST 800-63' }, + { key: 'Cheat_sheets', text: 'Cheat_sheets', value: 'Cheat_sheets' }, + { key: 'CAPEC', text: 'CAPEC', value: 'CAPEC' }, + { key: 'ZAP Rule', text: 'ZAP Rule', value: 'ZAP Rule' }, + { key: 'OWASP', text: 'OWASP', value: 'OWASP' }, + { + key: 'OWASP Secure Headers Project', + text: 'OWASP Secure Headers Project', + value: 'OWASP Secure Headers Project', + }, + { key: 'PCI DSS', text: 'PCI DSS', value: 'PCI DSS' }, + { key: 'OWASP Juice Shop', text: 'OWASP Juice Shop', value: 'OWASP Juice Shop' }, + ]; + const [BaseStandard, setBaseStandard] = useState(); + const [CompareStandard, setCompareStandard] = useState(); + const [gapAnalysis, setGapAnalysis] = useState(); + const { apiUrl } = useEnvironment(); + useEffect(() => { + const fetchData = async () => { + const result = await fetch( + `${apiUrl}/gap_analysis?standard=${BaseStandard}&standard=${CompareStandard}` + ); + const resultObj = await result.json(); + setGapAnalysis(resultObj); + }; + + if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; + fetchData().catch(console.error); + }, [BaseStandard, CompareStandard, setGapAnalysis]); + + return ( +
+ setBaseStandard(value?.toString())} + /> + setCompareStandard(value?.toString())} + /> + {gapAnalysis && ( + + + + {BaseStandard} + {CompareStandard} + + + + + {Object.keys(gapAnalysis).map((key) => ( + + + + + + {gapAnalysis[key].paths.map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} + {path.end.description},{' '} + + } + /> + ); + })} +
({gapAnalysis[key].paths.length}) +
+
+ ))} +
+
+ )} +
+ ); +}; diff --git a/application/frontend/src/routes.tsx b/application/frontend/src/routes.tsx index 876462503..548c2d7a3 100644 --- a/application/frontend/src/routes.tsx +++ b/application/frontend/src/routes.tsx @@ -1,10 +1,22 @@ import { ReactNode } from 'react'; -import { BROWSEROOT, CRE, DEEPLINK, GRAPH, INDEX, SEARCH, SECTION, SECTION_ID, STANDARD } from './const'; +import { + BROWSEROOT, + CRE, + DEEPLINK, + GAP_ANALYSIS, + GRAPH, + INDEX, + SEARCH, + SECTION, + SECTION_ID, + STANDARD, +} from './const'; import { CommonRequirementEnumeration, Graph, Search, Standard } from './pages'; import { BrowseRootCres } from './pages/BrowseRootCres/browseRootCres'; import { Chatbot } from './pages/chatbot/chatbot'; import { Deeplink } from './pages/Deeplink/Deeplink'; +import { GapAnalysis } from './pages/GapAnalysis/GapAnalysis'; import { MembershipRequired } from './pages/MembershipRequired/MembershipRequired'; import { SearchName } from './pages/Search/SearchName'; import { StandardSection } from './pages/Standard/StandardSection'; @@ -23,6 +35,12 @@ export const ROUTES: IRoute[] = [ showFilter: false, showHeader: false, }, + { + path: GAP_ANALYSIS, + component: GapAnalysis, + showHeader: true, + showFilter: false, + }, { path: `/node${STANDARD}/:id${SECTION}/:section`, component: StandardSection, diff --git a/application/web/web_main.py b/application/web/web_main.py index 2675900a3..ef9f01bc0 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -211,12 +211,12 @@ def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet paths = database.gap_analysis(standards) grouped_paths = {} for path in paths: - key = path['start']['id'] + key = path["start"]["id"] if key not in grouped_paths: - grouped_paths[key] = {"start": path['start'], "paths": []} - del path['start'] - grouped_paths[key]['paths'].append(path) - + grouped_paths[key] = {"start": path["start"], "paths": []} + del path["start"] + grouped_paths[key]["paths"].append(path) + return jsonify(grouped_paths) From c24f45f94da2bb011f5b50845b2ca78f9785f6ab Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 21 Aug 2023 17:23:41 +0100 Subject: [PATCH 07/75] Mock score and WIP UI --- application/database/db.py | 12 +++ .../src/pages/GapAnalysis/GapAnalysis.tsx | 94 ++++++++++++++----- application/utils/gap_analysis.py | 5 + application/web/web_main.py | 2 + 4 files changed, 89 insertions(+), 24 deletions(-) create mode 100644 application/utils/gap_analysis.py diff --git a/application/database/db.py b/application/database/db.py index f52f66960..adbb1af4d 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -268,6 +268,18 @@ def gap_analysis(self, name_1, name_2): database_="neo4j", ) + # records_no_related, _, _ = self.driver.execute_query( + # "MATCH" + # "(BaseStandard:Node {name: $name1}), " + # "(CompareStandard:Node {name: $name2}), " + # "p = shortestPath((BaseStandard)-[*]-(CompareStandard)) " + # "WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) AND ALL(r IN relationships(p) WHERE NOT r:RELATED) " + # "RETURN p ", + # name1=name_1, + # name2=name_2, + # database_="neo4j", + # ) + def format_segment(seg): return { "start": { diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 99aeff39c..2b952ce82 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useState } from 'react'; -import { Dropdown, Label, Popup, Segment, Table } from 'semantic-ui-react'; +import { Accordion, Dropdown, Icon, Label, Popup, Segment, Table } from 'semantic-ui-react'; import { useEnvironment } from '../../hooks'; @@ -49,6 +49,7 @@ export const GapAnalysis = () => { const [BaseStandard, setBaseStandard] = useState(); const [CompareStandard, setCompareStandard] = useState(); const [gapAnalysis, setGapAnalysis] = useState(); + const [activeIndex, SetActiveIndex] = useState(); const { apiUrl } = useEnvironment(); useEffect(() => { const fetchData = async () => { @@ -63,6 +64,12 @@ export const GapAnalysis = () => { fetchData().catch(console.error); }, [BaseStandard, CompareStandard, setGapAnalysis]); + const handleAccordionClick = (e, titleProps) => { + const { index } = titleProps + const newIndex = activeIndex === index ? -1 : index + SetActiveIndex(newIndex) + } + return (
{ - {gapAnalysis[key].paths.map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} - trigger={ - - {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} - {path.end.description},{' '} - - } - /> - ); - })} -
({gapAnalysis[key].paths.length}) + + + + {gapAnalysis[key].paths.sort((a, b) => a.score - b.score).slice(0, 3).map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + <> + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} + {path.end.description}{' '}({path.score}) + + } + /> +
+ + ); + })} + (Total Links: {gapAnalysis[key].paths.length}) +
+ + {gapAnalysis[key].paths.sort((a, b) => a.score - b.score).slice(2, gapAnalysis[key].paths.length).map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + <> + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} + {path.end.description}{' '}({path.score}) + + } + /> +
+ + ); + })} +
+
))} diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py new file mode 100644 index 000000000..fe3be0e39 --- /dev/null +++ b/application/utils/gap_analysis.py @@ -0,0 +1,5 @@ +import random + + +def get_path_score(path): + return random.randint(10, 100) \ No newline at end of file diff --git a/application/web/web_main.py b/application/web/web_main.py index ef9f01bc0..57584c111 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -15,6 +15,7 @@ from application.defs import osib_defs as odefs from application.utils import spreadsheet as sheet_utils from application.utils import mdutils, redirectors +from application.utils.gap_analysis import get_path_score from application.prompt_client import prompt_client as prompt_client from enum import Enum from flask import ( @@ -214,6 +215,7 @@ def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet key = path["start"]["id"] if key not in grouped_paths: grouped_paths[key] = {"start": path["start"], "paths": []} + path['score'] = get_path_score(path) del path["start"] grouped_paths[key]["paths"].append(path) From bb22a2ec949fbce9c314cd3db99feb384cf549dd Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 24 Aug 2023 09:57:58 +0100 Subject: [PATCH 08/75] implement scoring and basic tests --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 122 +++++++++--------- application/tests/gap_analysis_test.py | 103 +++++++++++++++ application/utils/gap_analysis.py | 27 +++- application/web/web_main.py | 2 +- 4 files changed, 190 insertions(+), 64 deletions(-) create mode 100644 application/tests/gap_analysis_test.py diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 2b952ce82..be6041207 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -65,10 +65,10 @@ export const GapAnalysis = () => { }, [BaseStandard, CompareStandard, setGapAnalysis]); const handleAccordionClick = (e, titleProps) => { - const { index } = titleProps - const newIndex = activeIndex === index ? -1 : index - SetActiveIndex(newIndex) - } + const { index } = titleProps; + const newIndex = activeIndex === index ? -1 : index; + SetActiveIndex(newIndex); + }; return (
@@ -107,65 +107,67 @@ export const GapAnalysis = () => { - - - {gapAnalysis[key].paths.sort((a, b) => a.score - b.score).slice(0, 3).map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - <> - { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} - trigger={ - - {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} - {path.end.description}{' '}({path.score}) - - } - /> -
- - ); - })} + + + {gapAnalysis[key].paths + .sort((a, b) => a.score - b.score) + .slice(0, 3) + .map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + <> + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section}{' '} + {path.end.subsection} {path.end.description} ({path.score}) + + } + /> +
+ + ); + })} (Total Links: {gapAnalysis[key].paths.length})
- {gapAnalysis[key].paths.sort((a, b) => a.score - b.score).slice(2, gapAnalysis[key].paths.length).map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - <> - { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} - trigger={ - - {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} - {path.end.description}{' '}({path.score}) - - } - /> -
- - ); - })} + {gapAnalysis[key].paths + .sort((a, b) => a.score - b.score) + .slice(2, gapAnalysis[key].paths.length) + .map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + <> + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section}{' '} + {path.end.subsection} {path.end.description} ({path.score}) + + } + /> +
+ + ); + })}
diff --git a/application/tests/gap_analysis_test.py b/application/tests/gap_analysis_test.py new file mode 100644 index 000000000..a1414c6d9 --- /dev/null +++ b/application/tests/gap_analysis_test.py @@ -0,0 +1,103 @@ +import unittest + +from application.utils.gap_analysis import ( + get_path_score, + get_relation_direction, + get_next_id, + PENALTIES +) + + +class TestGapAnalysis(unittest.TestCase): + def tearDown(self) -> None: + return None + + def setUp(self) -> None: + return None + + def test_get_relation_direction_UP(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_relation_direction(step, "123"), "UP") + + def test_get_relation_direction_DOWN(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_relation_direction(step, "234"), "DOWN") + + def test_get_next_id_start(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_next_id(step, "234"), "123") + + def test_get_next_id_end(self): + step = {"start": {"id": "123"}, "end": {"id": "234"}} + self.assertEqual(get_next_id(step, "123"), "234") + + def test_get_path_score_direct_siblings_zero(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "e2ac59b2-c1d8-4525-a6b3-155d480aecc9", + }, + }, + ], + } + self.assertEqual(get_path_score(path), 0) + + def test_get_path_score_one_up_zero(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "123", + }, + "relationship": "CONTAINS", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "123", + }, + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES['CONTAINS_UP']) diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py index fe3be0e39..308714592 100644 --- a/application/utils/gap_analysis.py +++ b/application/utils/gap_analysis.py @@ -1,5 +1,26 @@ -import random +PENALTIES = {"RELATED": 20, "CONTAINS_UP": 2, "CONTAINS_DOWN": 1, "LINKED_TO": 0} -def get_path_score(path): - return random.randint(10, 100) \ No newline at end of file +def get_path_score(path, start_id): + score = 0 + previous_id = start_id + for step in path["path"]: + penalty_type = step["relationship"] + + if step["relationship"] == "CONTAINS": + penalty_type = f"CONTAINS_{get_relation_direction(step, previous_id)}" + score += PENALTIES[penalty_type] + previous_id = get_next_id(step, previous_id) + return score + + +def get_relation_direction(step, previous_id): + if step["start"]["id"] == previous_id: + return "UP" + return "DOWN" + + +def get_next_id(step, previous_id): + if step["start"]["id"] == previous_id: + return step["end"]["id"] + return step["start"]["id"] diff --git a/application/web/web_main.py b/application/web/web_main.py index 57584c111..338f73011 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -215,7 +215,7 @@ def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet key = path["start"]["id"] if key not in grouped_paths: grouped_paths[key] = {"start": path["start"], "paths": []} - path['score'] = get_path_score(path) + path["score"] = get_path_score(path) del path["start"] grouped_paths[key]["paths"].append(path) From 4e81dd810cd10882dc9d2e54b94ed8c1a507fca1 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 24 Aug 2023 10:13:09 +0100 Subject: [PATCH 09/75] scoring passing tests --- application/tests/gap_analysis_test.py | 151 ++++++++++++++++++++++++- application/utils/gap_analysis.py | 4 +- 2 files changed, 149 insertions(+), 6 deletions(-) diff --git a/application/tests/gap_analysis_test.py b/application/tests/gap_analysis_test.py index a1414c6d9..396da8ee4 100644 --- a/application/tests/gap_analysis_test.py +++ b/application/tests/gap_analysis_test.py @@ -4,7 +4,7 @@ get_path_score, get_relation_direction, get_next_id, - PENALTIES + PENALTIES, ) @@ -31,7 +31,7 @@ def test_get_next_id_end(self): step = {"start": {"id": "123"}, "end": {"id": "234"}} self.assertEqual(get_next_id(step, "123"), "234") - def test_get_path_score_direct_siblings_zero(self): + def test_get_path_score_direct_siblings_returns_zero(self): path = { "start": { "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", @@ -62,7 +62,7 @@ def test_get_path_score_direct_siblings_zero(self): } self.assertEqual(get_path_score(path), 0) - def test_get_path_score_one_up_zero(self): + def test_get_path_score_one_up_returns_one_up_penaltiy(self): path = { "start": { "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", @@ -100,4 +100,147 @@ def test_get_path_score_one_up_zero(self): }, ], } - self.assertEqual(get_path_score(path), PENALTIES['CONTAINS_UP']) + self.assertEqual(get_path_score(path), PENALTIES["CONTAINS_UP"]) + + def test_get_path_score_one_down_one_returns_one_down_penaltiy(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + "relationship": "CONTAINS", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "123", + }, + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["CONTAINS_DOWN"]) + + def test_get_path_score_related_returns_related_penalty(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + "relationship": "RELATED", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "123", + }, + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["RELATED"]) + + def test_get_path_score_one_of_each_returns_penalty(self): + path = { + "start": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "path": [ + { + "end": { + "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", + }, + "relationship": "LINKED_TO", + "start": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + }, + { + "end": { + "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", + }, + "relationship": "CONTAINS", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "456", + }, + "relationship": "RELATED", + "start": { + "id": "123", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "CONTAINS", + "start": { + "id": "456", + }, + }, + { + "end": { + "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", + }, + "relationship": "LINKED_TO", + "start": { + "id": "456", + }, + }, + ], + } + self.assertEqual( + get_path_score(path), + PENALTIES["RELATED"] + + PENALTIES["CONTAINS_UP"] + + PENALTIES["CONTAINS_DOWN"], + ) diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py index 308714592..47f97e830 100644 --- a/application/utils/gap_analysis.py +++ b/application/utils/gap_analysis.py @@ -1,9 +1,9 @@ PENALTIES = {"RELATED": 20, "CONTAINS_UP": 2, "CONTAINS_DOWN": 1, "LINKED_TO": 0} -def get_path_score(path, start_id): +def get_path_score(path): score = 0 - previous_id = start_id + previous_id = path["start"]["id"] for step in path["path"]: penalty_type = step["relationship"] From 324526301095c0fb567ece00e03aed5d4ea019bc Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 24 Aug 2023 11:21:05 +0100 Subject: [PATCH 10/75] Update the UI --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 158 +++++++++++------- 1 file changed, 95 insertions(+), 63 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index be6041207..1c5383f37 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,5 +1,7 @@ import React, { useEffect, useState } from 'react'; -import { Accordion, Dropdown, Icon, Label, Popup, Segment, Table } from 'semantic-ui-react'; +import { Accordion, Button, Dropdown, Grid, Popup, Table } from 'semantic-ui-react'; +import { useLocation } from "react-router-dom"; +import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; import { useEnvironment } from '../../hooks'; @@ -12,12 +14,18 @@ const GetSegmentText = (segment, segmentID) => { nextID = segment.start.id; arrow = '<-'; } - const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID} ${textPart.section} ${textPart.subsection} ${textPart.description}`; + const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID ?? ""} ${textPart.section ?? ""} ${textPart.subsection ?? ''} ${textPart.description ?? ''}`; return { text, nextID }; }; +function useQuery() { + const { search } = useLocation(); + + return React.useMemo(() => new URLSearchParams(search), [search]); +} + export const GapAnalysis = () => { - const standardOptions = [ + const standardOptions = [ // TODO: Automate this list { key: '', text: '', value: undefined }, { key: 'OWASP Top 10 2021', text: 'OWASP Top 10 2021', value: 'OWASP Top 10 2021' }, { key: 'NIST 800-53 v5', text: 'NIST 800-53 v5', value: 'NIST 800-53 v5' }, @@ -46,23 +54,34 @@ export const GapAnalysis = () => { { key: 'PCI DSS', text: 'PCI DSS', value: 'PCI DSS' }, { key: 'OWASP Juice Shop', text: 'OWASP Juice Shop', value: 'OWASP Juice Shop' }, ]; - const [BaseStandard, setBaseStandard] = useState(); - const [CompareStandard, setCompareStandard] = useState(); + const searchParams = useQuery(); + const [BaseStandard, setBaseStandard] = useState(searchParams.get('base') ?? ""); + const [CompareStandard, setCompareStandard] = useState(searchParams.get('compare') ?? ""); const [gapAnalysis, setGapAnalysis] = useState(); const [activeIndex, SetActiveIndex] = useState(); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); const { apiUrl } = useEnvironment(); + + const GetStrength = (score) => { + if(score < 5) return 'Strong' + if(score > 20) return 'Weak' + return 'Average' + } useEffect(() => { const fetchData = async () => { const result = await fetch( `${apiUrl}/gap_analysis?standard=${BaseStandard}&standard=${CompareStandard}` ); const resultObj = await result.json(); + setLoading(false); setGapAnalysis(resultObj); }; if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; - fetchData().catch(console.error); - }, [BaseStandard, CompareStandard, setGapAnalysis]); + setLoading(true); + fetchData().catch(e => setError(e)); + }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); const handleAccordionClick = (e, titleProps) => { const { index } = titleProps; @@ -72,22 +91,33 @@ export const GapAnalysis = () => { return (
- setBaseStandard(value?.toString())} - /> - setCompareStandard(value?.toString())} - /> + + + + setBaseStandard(value?.toString())} + value={BaseStandard} + /> + + + setCompareStandard(value?.toString())} + value={CompareStandard} + /> + + + + {gapAnalysis && ( - +
{BaseStandard} @@ -97,58 +127,60 @@ export const GapAnalysis = () => { {Object.keys(gapAnalysis).map((key) => ( - - - + + +

+ {gapAnalysis[key].start.name} {gapAnalysis[key].start.section} {gapAnalysis[key].start.subsection}
+ {gapAnalysis[key].start.sectionID} + {gapAnalysis[key].start.description} +

- + + {gapAnalysis[key].paths + .sort((a, b) => a.score - b.score) + .slice(0, 3) + .map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + + { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section}{' '} + {path.end.subsection} {path.end.description} ({GetStrength(path.score)}:{path.score}) + + } + /> +
+
+ ); + })} + - - {gapAnalysis[key].paths - .sort((a, b) => a.score - b.score) - .slice(0, 3) - .map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - <> - { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} - trigger={ - - {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} ({path.score}) - - } - /> -
- - ); - })} - (Total Links: {gapAnalysis[key].paths.length}) +
+ Weaker Links:
{gapAnalysis[key].paths .sort((a, b) => a.score - b.score) .slice(2, gapAnalysis[key].paths.length) .map((path) => { let segmentID = gapAnalysis[key].start.id; return ( - <> + { @@ -160,12 +192,12 @@ export const GapAnalysis = () => { trigger={ {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} ({path.score}) + {path.end.subsection} {path.end.description} {GetStrength(path.score)}:{path.score}) } />
- +
); })}
From 814996adddb89f4361264a00dc6a3a19fc5833fb Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 13:25:55 +0100 Subject: [PATCH 11/75] Updated: Dropdowns are now dynamic --- application/database/db.py | 49 +++++------------ .../src/pages/GapAnalysis/GapAnalysis.tsx | 54 ++++++++----------- application/web/web_main.py | 17 +++++- 3 files changed, 52 insertions(+), 68 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index adbb1af4d..33f338b76 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -268,18 +268,6 @@ def gap_analysis(self, name_1, name_2): database_="neo4j", ) - # records_no_related, _, _ = self.driver.execute_query( - # "MATCH" - # "(BaseStandard:Node {name: $name1}), " - # "(CompareStandard:Node {name: $name2}), " - # "p = shortestPath((BaseStandard)-[*]-(CompareStandard)) " - # "WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) AND ALL(r IN relationships(p) WHERE NOT r:RELATED) " - # "RETURN p ", - # name1=name_1, - # name2=name_2, - # database_="neo4j", - # ) - def format_segment(seg): return { "start": { @@ -323,7 +311,17 @@ def format_record(rec): } return [format_record(rec["p"]) for rec in records] - + + @classmethod + def standards(self): + if not self.connected: + return + records, _, _ = self.driver.execute_query( + 'MATCH (n:Node {ntype: "Standard"}) ' + "RETURN collect(distinct n.name)", + database_="neo4j", + ) + return records[0][0] class CRE_Graph: graph: nx.Graph = None @@ -1239,30 +1237,11 @@ def find_path_between_nodes( return res def gap_analysis(self, node_names: List[str]): - """Since the CRE structure is a tree-like graph with - leaves being nodes we can find the paths between nodes - find_path_between_nodes() is a graph-path-finding method - """ - # processed_nodes = [] - # dbnodes: List[Node] = [] - # for name in node_names: - # dbnodes.extend(self.session.query(Node).filter(Node.name == name).all()) - - # for node in dbnodes: - # working_node = nodeFromDB(node) - # for other_node in dbnodes: - # if node.id == other_node.id: - # continue - # if self.find_path_between_nodes(node.id, other_node.id): - # working_node.add_link( - # cre_defs.Link( - # ltype=cre_defs.LinkTypes.LinkedTo, - # document=nodeFromDB(other_node), - # ) - # ) - # processed_nodes.append(working_node) return self.neo_db.gap_analysis(node_names[0], node_names[1]) + def standards(self): + return self.neo_db.standards() + def text_search(self, text: str) -> List[Optional[cre_defs.Document]]: """Given a piece of text, tries to find the best match for the text in the database. diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 1c5383f37..32356baaa 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,9 +1,10 @@ import React, { useEffect, useState } from 'react'; -import { Accordion, Button, Dropdown, Grid, Popup, Table } from 'semantic-ui-react'; +import { Accordion, Button, Dropdown, DropdownItemProps, Grid, Popup, Table } from 'semantic-ui-react'; import { useLocation } from "react-router-dom"; import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; import { useEnvironment } from '../../hooks'; +import axios from 'axios'; const GetSegmentText = (segment, segmentID) => { let textPart = segment.end; @@ -25,36 +26,12 @@ function useQuery() { } export const GapAnalysis = () => { - const standardOptions = [ // TODO: Automate this list + const standardOptionsDefault = [ { key: '', text: '', value: undefined }, - { key: 'OWASP Top 10 2021', text: 'OWASP Top 10 2021', value: 'OWASP Top 10 2021' }, - { key: 'NIST 800-53 v5', text: 'NIST 800-53 v5', value: 'NIST 800-53 v5' }, - { key: 'ISO 27001', text: 'ISO 27001', value: 'ISO 27001' }, - { key: 'Cloud Controls Matrix', text: 'Cloud Controls Matrix', value: 'Cloud Controls Matrix' }, - { key: 'ASVS', text: 'ASVS', value: 'ASVS' }, - { key: 'OWASP Proactive Controls', text: 'OWASP Proactive Controls', value: 'OWASP Proactive Controls' }, - { key: 'SAMM', text: 'SAMM', value: 'SAMM' }, - { key: 'CWE', text: 'CWE', value: 'CWE' }, - { key: 'OWASP Cheat Sheets', text: 'OWASP Cheat Sheets', value: 'OWASP Cheat Sheets' }, - { - key: 'OWASP Web Security Testing Guide (WSTG)', - text: 'OWASP Web Security Testing Guide (WSTG)', - value: 'OWASP Web Security Testing Guide (WSTG)', - }, - { key: 'NIST 800-63', text: 'NIST 800-63', value: 'NIST 800-63' }, - { key: 'Cheat_sheets', text: 'Cheat_sheets', value: 'Cheat_sheets' }, - { key: 'CAPEC', text: 'CAPEC', value: 'CAPEC' }, - { key: 'ZAP Rule', text: 'ZAP Rule', value: 'ZAP Rule' }, - { key: 'OWASP', text: 'OWASP', value: 'OWASP' }, - { - key: 'OWASP Secure Headers Project', - text: 'OWASP Secure Headers Project', - value: 'OWASP Secure Headers Project', - }, - { key: 'PCI DSS', text: 'PCI DSS', value: 'PCI DSS' }, - { key: 'OWASP Juice Shop', text: 'OWASP Juice Shop', value: 'OWASP Juice Shop' }, + ]; const searchParams = useQuery(); + const [standardOptions, setStandardOptions] = useState(standardOptionsDefault); const [BaseStandard, setBaseStandard] = useState(searchParams.get('base') ?? ""); const [CompareStandard, setCompareStandard] = useState(searchParams.get('compare') ?? ""); const [gapAnalysis, setGapAnalysis] = useState(); @@ -68,19 +45,32 @@ export const GapAnalysis = () => { if(score > 20) return 'Weak' return 'Average' } + + useEffect(() => { + const fetchData = async () => { + const result = await axios.get( + `${apiUrl}/standards` + ); + setLoading(false); + setStandardOptions(standardOptionsDefault.concat(result.data.map(x => ({ key: x, text: x, value: x })))); + }; + + setLoading(true); + fetchData().catch(e => {setLoading(false); setError(e.response.data.message ?? e.message)}); + }, [setStandardOptions, setLoading, setError]); + useEffect(() => { const fetchData = async () => { - const result = await fetch( + const result = await axios.get( `${apiUrl}/gap_analysis?standard=${BaseStandard}&standard=${CompareStandard}` ); - const resultObj = await result.json(); setLoading(false); - setGapAnalysis(resultObj); + setGapAnalysis(result.data); }; if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; setLoading(true); - fetchData().catch(e => setError(e)); + fetchData().catch(e => {setLoading(false); setError(e.response.data.message ?? e.message)}); }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); const handleAccordionClick = (e, titleProps) => { diff --git a/application/web/web_main.py b/application/web/web_main.py index 338f73011..94f7abd2e 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -66,6 +66,10 @@ def extend_cre_with_tag_links( return cre +def neo4j_not_running_rejection(): + logger.info("Neo4j is disabled") + return jsonify({"message": "Backend services connected to this feature are not running at the moment."}), 500 + @app.route("/rest/v1/id/", methods=["GET"]) @app.route("/rest/v1/name/", methods=["GET"]) @cache.cached(timeout=50) @@ -206,10 +210,12 @@ def find_document_by_tag() -> Any: @app.route("/rest/v1/gap_analysis", methods=["GET"]) @cache.cached(timeout=50) -def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet +def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") paths = database.gap_analysis(standards) + if paths is None: + return neo4j_not_running_rejection() grouped_paths = {} for path in paths: key = path["start"]["id"] @@ -221,6 +227,15 @@ def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet return jsonify(grouped_paths) +@app.route("/rest/v1/standards", methods=["GET"]) +@cache.cached(timeout=50) +def standards() -> Any: + database = db.Node_collection() + standards = database.standards() + if standards is None: + neo4j_not_running_rejection() + return standards + @app.route("/rest/v1/text_search", methods=["GET"]) # @cache.cached(timeout=50) From 6af9f69f19eaf1ea70b6a22faab51cd160d7d1f2 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 15:20:11 +0100 Subject: [PATCH 12/75] Localise neo4j --- .gitignore | 5 ++++- Makefile | 2 +- application/database/db.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index cbf1dd8c1..d6db6dd2b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,7 @@ yarn-error.log coverage/ ### Dev db -standards_cache.sqlite \ No newline at end of file +standards_cache.sqlite + +### Neo4j +neo4j/ \ No newline at end of file diff --git a/Makefile b/Makefile index ef43d81e7..5da2b61ea 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ docker-run: docker run -it -p 5000:5000 opencre:$(shell git rev-parse HEAD) docker-neo4j: - docker run --env NEO4J_PLUGINS='["apoc"]' --volume=/Users/johnharvey/neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j + docker run --env NEO4J_PLUGINS='["apoc"]' --volume=./neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j lint: [ -d "./venv" ] && . ./venv/bin/activate && black . && yarn lint diff --git a/application/database/db.py b/application/database/db.py index 33f338b76..030eaecb2 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -333,7 +333,7 @@ def instance(cls, session, neo_db: NEO_DB): if cls.__instance is None: cls.__instance = cls.__new__(cls) cls.neo_db = neo_db - # cls.graph = cls.load_cre_graph(session) + cls.graph = cls.load_cre_graph(session) return cls.__instance def __init__(sel): From f4b4e9ab016553709c0a0a8e0b1134fcc5473b69 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 15:20:53 +0100 Subject: [PATCH 13/75] Added Navigation method --- application/frontend/src/scaffolding/Header/Header.tsx | 4 ++++ application/frontend/src/scaffolding/Header/header.scss | 1 + 2 files changed, 5 insertions(+) diff --git a/application/frontend/src/scaffolding/Header/Header.tsx b/application/frontend/src/scaffolding/Header/Header.tsx index aa872fb43..c2652d80e 100644 --- a/application/frontend/src/scaffolding/Header/Header.tsx +++ b/application/frontend/src/scaffolding/Header/Header.tsx @@ -13,6 +13,10 @@ const getLinks = (): { to: string; name: string }[] => [ to: `/`, name: 'Open CRE', }, + { + to: `/gap_analysis`, + name: 'Gap Analysis', + }, ]; export const Header = () => { diff --git a/application/frontend/src/scaffolding/Header/header.scss b/application/frontend/src/scaffolding/Header/header.scss index e01e85568..faec51d53 100644 --- a/application/frontend/src/scaffolding/Header/header.scss +++ b/application/frontend/src/scaffolding/Header/header.scss @@ -20,6 +20,7 @@ padding-top: 10px; padding-bottom: 10px; text-align: center; + margin: 0 2px; .item { color: white !important; From 4b8a86bb0d6c54ea338afced10274040f722b4e9 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 15:21:07 +0100 Subject: [PATCH 14/75] Add share and nav links --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 32356baaa..89c563235 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,5 +1,5 @@ import React, { useEffect, useState } from 'react'; -import { Accordion, Button, Dropdown, DropdownItemProps, Grid, Popup, Table } from 'semantic-ui-react'; +import { Accordion, Button, Dropdown, DropdownItemProps, Grid, Icon, Popup, Table } from 'semantic-ui-react'; import { useLocation } from "react-router-dom"; import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; @@ -104,10 +104,17 @@ export const GapAnalysis = () => { /> + {gapAnalysis && ( + + + + )} {gapAnalysis && ( -
+
{BaseStandard} @@ -120,7 +127,11 @@ export const GapAnalysis = () => {

- {gapAnalysis[key].start.name} {gapAnalysis[key].start.section} {gapAnalysis[key].start.subsection}
+ {gapAnalysis[key].start.name} {gapAnalysis[key].start.section} {gapAnalysis[key].start.subsection} + + + +
{gapAnalysis[key].start.sectionID} {gapAnalysis[key].start.description}

@@ -146,7 +157,10 @@ export const GapAnalysis = () => { trigger={ {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} ({GetStrength(path.score)}:{path.score}) + {path.end.subsection} {path.end.description} ({GetStrength(path.score)}:{path.score}){' '} + + + } /> @@ -182,7 +196,10 @@ export const GapAnalysis = () => { trigger={ {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} {GetStrength(path.score)}:{path.score}) + {path.end.subsection} {path.end.description} {GetStrength(path.score)}:{path.score}){' '} + + + } /> From 8185d6cef274d92484f1ed23fd7c96ed44f4e7ef Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 15:40:58 +0100 Subject: [PATCH 15/75] readme improvement --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 6ccf6a916..6e1925678 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,13 @@ To run the web application for development you can run Alternatively, you can use the dockerfile with
make docker && make docker-run
+Some features like Gap Analysis require a neo4j DB running you can start this with +
make docker-neo4j
+enviroment varaibles for app to connect to neo4jDB (default): +- NEO4J_URI (localhost) +- NEO4J_USR (neo4j) +- NEO4J_PASS (password) + To run the web application for production you need gunicorn and you can run from within the cre_sync dir
make prod-run
From 6a6ceba4ce30d37b218d2c3ccfd552a972ff8f6d Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 16:19:54 +0100 Subject: [PATCH 16/75] Hide table on new search --- application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 89c563235..246cc9d29 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -69,6 +69,7 @@ export const GapAnalysis = () => { }; if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; + setGapAnalysis(undefined); setLoading(true); fetchData().catch(e => {setLoading(false); setError(e.response.data.message ?? e.message)}); }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); From 31b8b75b2d6de477b8c8e129ea1377105c2e1a3a Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 16:20:12 +0100 Subject: [PATCH 17/75] Optermise query to remove relates to --- application/database/db.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 030eaecb2..55f062baa 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -257,12 +257,14 @@ def gap_analysis(self, name_1, name_2): if not self.connected: return records, _, _ = self.driver.execute_query( - "MATCH" - "(BaseStandard:Node {name: $name1}), " - "(CompareStandard:Node {name: $name2}), " - "p = shortestPath((BaseStandard)-[*]-(CompareStandard)) " - "WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) " - "RETURN p ", + """ + OPTIONAL MATCH (BaseStandard:Node {name: $name1}) + OPTIONAL MATCH (CompareStandard:Node {name: $name2}) + OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) + WITH p + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) + RETURN p + """, name1=name_1, name2=name_2, database_="neo4j", From af4fd6ee05700c263a0dcc2dc8364fa8b203deb4 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 16:53:41 +0100 Subject: [PATCH 18/75] Get duel running method working and show empty values --- application/database/db.py | 41 ++++++- .../src/pages/GapAnalysis/GapAnalysis.tsx | 112 +++++++++--------- application/web/web_main.py | 16 ++- 3 files changed, 106 insertions(+), 63 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 55f062baa..6f88d4c7e 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -255,8 +255,30 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type): @classmethod def gap_analysis(self, name_1, name_2): if not self.connected: - return - records, _, _ = self.driver.execute_query( + return None, None + base_standard, _, _ = self.driver.execute_query( + """ + MATCH (BaseStandard:Node {name: $name1}) + RETURN BaseStandard + """, + name1=name_1, + database_="neo4j", + ) + + path_records_all, _, _ = self.driver.execute_query( + """ + OPTIONAL MATCH (BaseStandard:Node {name: $name1}) + OPTIONAL MATCH (CompareStandard:Node {name: $name2}) + OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) + WITH p + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) + RETURN p + """, + name1=name_1, + name2=name_2, + database_="neo4j", + ) + path_records, _, _ = self.driver.execute_query( """ OPTIONAL MATCH (BaseStandard:Node {name: $name1}) OPTIONAL MATCH (CompareStandard:Node {name: $name2}) @@ -291,7 +313,7 @@ def format_segment(seg): "relationship": seg.type, } - def format_record(rec): + def format_path_record(rec): return { "start": { "name": rec.start_node["name"], @@ -311,8 +333,19 @@ def format_record(rec): }, "path": [format_segment(seg) for seg in rec.relationships], } + + def format_record(rec): + return { + "name": rec["name"], + "sectionID": rec["section_id"], + "section": rec["section"], + "subsection": rec["subsection"], + "description": rec["description"], + "id": rec["id"], + } + - return [format_record(rec["p"]) for rec in records] + return [format_record(rec["BaseStandard"]) for rec in base_standard], [format_path_record(rec["p"]) for rec in (path_records + path_records_all)] @classmethod def standards(self): diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 246cc9d29..19b7f6b7a 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -28,7 +28,7 @@ function useQuery() { export const GapAnalysis = () => { const standardOptionsDefault = [ { key: '', text: '', value: undefined }, - + ]; const searchParams = useQuery(); const [standardOptions, setStandardOptions] = useState(standardOptionsDefault); @@ -41,10 +41,10 @@ export const GapAnalysis = () => { const { apiUrl } = useEnvironment(); const GetStrength = (score) => { - if(score < 5) return 'Strong' - if(score > 20) return 'Weak' + if (score < 5) return 'Strong' + if (score > 20) return 'Weak' return 'Average' - } + } useEffect(() => { const fetchData = async () => { @@ -56,7 +56,7 @@ export const GapAnalysis = () => { }; setLoading(true); - fetchData().catch(e => {setLoading(false); setError(e.response.data.message ?? e.message)}); + fetchData().catch(e => { setLoading(false); setError(e.response.data.message ?? e.message) }); }, [setStandardOptions, setLoading, setError]); useEffect(() => { @@ -71,7 +71,7 @@ export const GapAnalysis = () => { if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; setGapAnalysis(undefined); setLoading(true); - fetchData().catch(e => {setLoading(false); setError(e.response.data.message ?? e.message)}); + fetchData().catch(e => { setLoading(false); setError(e.response.data.message ?? e.message) }); }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); const handleAccordionClick = (e, titleProps) => { @@ -106,16 +106,16 @@ export const GapAnalysis = () => { {gapAnalysis && ( - - - + + + )} {gapAnalysis && ( -
+
{BaseStandard} @@ -132,19 +132,19 @@ export const GapAnalysis = () => { -
+
{gapAnalysis[key].start.sectionID} {gapAnalysis[key].start.description}

- {gapAnalysis[key].paths + {Object.values(gapAnalysis[key].paths) .sort((a, b) => a.score - b.score) .slice(0, 3) .map((path) => { let segmentID = gapAnalysis[key].start.id; return ( - + { ); })} + {Object.keys(gapAnalysis[key].paths).length > 3 && ( + + + + + + Weaker Links:
+ {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice(3, gapAnalysis[key].paths.length) + .map((path) => { + let segmentID = gapAnalysis[key].start.id; + return ( + + - - - - - Weaker Links:
- {gapAnalysis[key].paths - .sort((a, b) => a.score - b.score) - .slice(2, gapAnalysis[key].paths.length) - .map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - - { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} - trigger={ - - {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} {GetStrength(path.score)}:{path.score}){' '} - - - - - } - /> -
-
- ); - })} -
-
+ hoverable + content={path.path + .map((segment) => { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + }) + .join('')} + trigger={ + + {path.end.name} {path.end.sectionID} {path.end.section}{' '} + {path.end.subsection} {path.end.description} {GetStrength(path.score)}:{path.score}){' '} + + + + + } + /> +
+
+ ); + })} + + + )} + {Object.keys(gapAnalysis[key].paths).length === 0 && (No links Found)}
))} diff --git a/application/web/web_main.py b/application/web/web_main.py index 94f7abd2e..e5950c82e 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -213,17 +213,25 @@ def find_document_by_tag() -> Any: def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") - paths = database.gap_analysis(standards) + base_standard, paths = database.gap_analysis(standards) if paths is None: return neo4j_not_running_rejection() grouped_paths = {} + for node in base_standard: + key = node["id"] + if key not in grouped_paths: + grouped_paths[key] = {"start": node, "paths": {}} + for path in paths: key = path["start"]["id"] - if key not in grouped_paths: - grouped_paths[key] = {"start": path["start"], "paths": []} + end_key = path["end"]["id"] path["score"] = get_path_score(path) del path["start"] - grouped_paths[key]["paths"].append(path) + if end_key in grouped_paths[key]["paths"]: + if grouped_paths[key]["paths"][end_key]['score'] > path["score"]: + grouped_paths[key]["paths"][end_key] = path + else: + grouped_paths[key]["paths"][end_key] = path return jsonify(grouped_paths) From 865bef07da08e0111b907d58bd3682367b815a47 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 16:58:05 +0100 Subject: [PATCH 19/75] Refactor grouping & scoring code locations --- application/database/db.py | 25 ++++++++++++++++++++++++- application/web/web_main.py | 24 +++--------------------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 6f88d4c7e..70152e60c 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -17,6 +17,8 @@ from sqlalchemy.sql.expression import desc # type: ignore import uuid +from application.utils.gap_analysis import get_path_score + from .. import sqla # type: ignore logging.basicConfig() @@ -1272,7 +1274,28 @@ def find_path_between_nodes( return res def gap_analysis(self, node_names: List[str]): - return self.neo_db.gap_analysis(node_names[0], node_names[1]) + if not self.neo_db.connected: + return None + base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1]) + if base_standard is None: + return None + grouped_paths = {} + for node in base_standard: + key = node["id"] + if key not in grouped_paths: + grouped_paths[key] = {"start": node, "paths": {}} + + for path in paths: + key = path["start"]["id"] + end_key = path["end"]["id"] + path["score"] = get_path_score(path) + del path["start"] + if end_key in grouped_paths[key]["paths"]: + if grouped_paths[key]["paths"][end_key]['score'] > path["score"]: + grouped_paths[key]["paths"][end_key] = path + else: + grouped_paths[key]["paths"][end_key] = path + return grouped_paths def standards(self): return self.neo_db.standards() diff --git a/application/web/web_main.py b/application/web/web_main.py index e5950c82e..3aac0e050 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -15,7 +15,6 @@ from application.defs import osib_defs as odefs from application.utils import spreadsheet as sheet_utils from application.utils import mdutils, redirectors -from application.utils.gap_analysis import get_path_score from application.prompt_client import prompt_client as prompt_client from enum import Enum from flask import ( @@ -213,27 +212,10 @@ def find_document_by_tag() -> Any: def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") - base_standard, paths = database.gap_analysis(standards) - if paths is None: + gap_analysis = database.gap_analysis(standards) + if gap_analysis is None: return neo4j_not_running_rejection() - grouped_paths = {} - for node in base_standard: - key = node["id"] - if key not in grouped_paths: - grouped_paths[key] = {"start": node, "paths": {}} - - for path in paths: - key = path["start"]["id"] - end_key = path["end"]["id"] - path["score"] = get_path_score(path) - del path["start"] - if end_key in grouped_paths[key]["paths"]: - if grouped_paths[key]["paths"][end_key]['score'] > path["score"]: - grouped_paths[key]["paths"][end_key] = path - else: - grouped_paths[key]["paths"][end_key] = path - - return jsonify(grouped_paths) + return jsonify(gap_analysis) @app.route("/rest/v1/standards", methods=["GET"]) @cache.cached(timeout=50) From 0f5cbb59efb904b3c305c27714d0825b2191ad43 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 4 Sep 2023 17:01:25 +0100 Subject: [PATCH 20/75] Add colour to strength raiting --- application/database/db.py | 29 ++--- .../src/pages/GapAnalysis/GapAnalysis.tsx | 113 ++++++++++++------ application/web/web_main.py | 17 ++- 3 files changed, 105 insertions(+), 54 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 70152e60c..3b96d7b03 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -335,31 +335,32 @@ def format_path_record(rec): }, "path": [format_segment(seg) for seg in rec.relationships], } - + def format_record(rec): return { - "name": rec["name"], - "sectionID": rec["section_id"], - "section": rec["section"], - "subsection": rec["subsection"], - "description": rec["description"], - "id": rec["id"], - } + "name": rec["name"], + "sectionID": rec["section_id"], + "section": rec["section"], + "subsection": rec["subsection"], + "description": rec["description"], + "id": rec["id"], + } + return [format_record(rec["BaseStandard"]) for rec in base_standard], [ + format_path_record(rec["p"]) for rec in (path_records + path_records_all) + ] - return [format_record(rec["BaseStandard"]) for rec in base_standard], [format_path_record(rec["p"]) for rec in (path_records + path_records_all)] - @classmethod def standards(self): if not self.connected: return records, _, _ = self.driver.execute_query( - 'MATCH (n:Node {ntype: "Standard"}) ' - "RETURN collect(distinct n.name)", + 'MATCH (n:Node {ntype: "Standard"}) ' "RETURN collect(distinct n.name)", database_="neo4j", ) return records[0][0] + class CRE_Graph: graph: nx.Graph = None neo_db: NEO_DB = None @@ -1276,7 +1277,7 @@ def find_path_between_nodes( def gap_analysis(self, node_names: List[str]): if not self.neo_db.connected: return None - base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1]) + base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1]) if base_standard is None: return None grouped_paths = {} @@ -1291,7 +1292,7 @@ def gap_analysis(self, node_names: List[str]): path["score"] = get_path_score(path) del path["start"] if end_key in grouped_paths[key]["paths"]: - if grouped_paths[key]["paths"][end_key]['score'] > path["score"]: + if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: grouped_paths[key]["paths"][end_key] = path else: grouped_paths[key]["paths"][end_key] = path diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 19b7f6b7a..8a538e4d4 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,10 +1,10 @@ +import axios from 'axios'; import React, { useEffect, useState } from 'react'; +import { useLocation } from 'react-router-dom'; import { Accordion, Button, Dropdown, DropdownItemProps, Grid, Icon, Popup, Table } from 'semantic-ui-react'; -import { useLocation } from "react-router-dom"; -import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; +import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; import { useEnvironment } from '../../hooks'; -import axios from 'axios'; const GetSegmentText = (segment, segmentID) => { let textPart = segment.end; @@ -15,7 +15,9 @@ const GetSegmentText = (segment, segmentID) => { nextID = segment.start.id; arrow = '<-'; } - const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID ?? ""} ${textPart.section ?? ""} ${textPart.subsection ?? ''} ${textPart.description ?? ''}`; + const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID ?? ''} ${ + textPart.section ?? '' + } ${textPart.subsection ?? ''} ${textPart.description ?? ''}`; return { text, nextID }; }; @@ -26,14 +28,15 @@ function useQuery() { } export const GapAnalysis = () => { - const standardOptionsDefault = [ - { key: '', text: '', value: undefined }, - - ]; + const standardOptionsDefault = [{ key: '', text: '', value: undefined }]; const searchParams = useQuery(); - const [standardOptions, setStandardOptions] = useState(standardOptionsDefault); - const [BaseStandard, setBaseStandard] = useState(searchParams.get('base') ?? ""); - const [CompareStandard, setCompareStandard] = useState(searchParams.get('compare') ?? ""); + const [standardOptions, setStandardOptions] = useState( + standardOptionsDefault + ); + const [BaseStandard, setBaseStandard] = useState(searchParams.get('base') ?? ''); + const [CompareStandard, setCompareStandard] = useState( + searchParams.get('compare') ?? '' + ); const [gapAnalysis, setGapAnalysis] = useState(); const [activeIndex, SetActiveIndex] = useState(); const [loading, setLoading] = useState(false); @@ -41,22 +44,31 @@ export const GapAnalysis = () => { const { apiUrl } = useEnvironment(); const GetStrength = (score) => { - if (score < 5) return 'Strong' - if (score > 20) return 'Weak' - return 'Average' - } + if (score < 5) return 'Strong'; + if (score > 20) return 'Weak'; + return 'Average'; + }; + + const GetStrengthColor = (score) => { + if (score < 5) return 'Green'; + if (score > 20) return 'Red'; + return 'Orange'; + }; useEffect(() => { const fetchData = async () => { - const result = await axios.get( - `${apiUrl}/standards` - ); + const result = await axios.get(`${apiUrl}/standards`); setLoading(false); - setStandardOptions(standardOptionsDefault.concat(result.data.map(x => ({ key: x, text: x, value: x })))); + setStandardOptions( + standardOptionsDefault.concat(result.data.map((x) => ({ key: x, text: x, value: x }))) + ); }; setLoading(true); - fetchData().catch(e => { setLoading(false); setError(e.response.data.message ?? e.message) }); + fetchData().catch((e) => { + setLoading(false); + setError(e.response.data.message ?? e.message); + }); }, [setStandardOptions, setLoading, setError]); useEffect(() => { @@ -71,7 +83,10 @@ export const GapAnalysis = () => { if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; setGapAnalysis(undefined); setLoading(true); - fetchData().catch(e => { setLoading(false); setError(e.response.data.message ?? e.message) }); + fetchData().catch((e) => { + setLoading(false); + setError(e.response.data.message ?? e.message); + }); }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); const handleAccordionClick = (e, titleProps) => { @@ -107,7 +122,14 @@ export const GapAnalysis = () => { {gapAnalysis && ( - @@ -115,7 +137,7 @@ export const GapAnalysis = () => { {gapAnalysis && ( -
+
{BaseStandard} @@ -126,10 +148,16 @@ export const GapAnalysis = () => { {Object.keys(gapAnalysis).map((key) => ( - +

- {gapAnalysis[key].start.name} {gapAnalysis[key].start.section} {gapAnalysis[key].start.subsection} - + + {gapAnalysis[key].start.name} {gapAnalysis[key].start.section}{' '} + {gapAnalysis[key].start.subsection} + +
@@ -157,9 +185,16 @@ export const GapAnalysis = () => { .join('')} trigger={ - {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} ({GetStrength(path.score)}:{path.score}){' '} - + {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} + {path.end.description} ( + + {GetStrength(path.score)}:{path.score} + + ){' '} + @@ -171,11 +206,14 @@ export const GapAnalysis = () => { })} {Object.keys(gapAnalysis[key].paths).length > 3 && ( - - + + - Weaker Links:
{Object.values(gapAnalysis[key].paths) .sort((a, b) => a.score - b.score) .slice(3, gapAnalysis[key].paths.length) @@ -185,7 +223,6 @@ export const GapAnalysis = () => { { @@ -197,8 +234,12 @@ export const GapAnalysis = () => { trigger={ {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} {GetStrength(path.score)}:{path.score}){' '} - + {path.end.subsection} {path.end.description} {GetStrength(path.score)}: + {path.score}){' '} + @@ -211,7 +252,7 @@ export const GapAnalysis = () => {
)} - {Object.keys(gapAnalysis[key].paths).length === 0 && (No links Found)} + {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} ))} diff --git a/application/web/web_main.py b/application/web/web_main.py index 3aac0e050..f65930485 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -67,7 +67,15 @@ def extend_cre_with_tag_links( def neo4j_not_running_rejection(): logger.info("Neo4j is disabled") - return jsonify({"message": "Backend services connected to this feature are not running at the moment."}), 500 + return ( + jsonify( + { + "message": "Backend services connected to this feature are not running at the moment." + } + ), + 500, + ) + @app.route("/rest/v1/id/", methods=["GET"]) @app.route("/rest/v1/name/", methods=["GET"]) @@ -213,17 +221,18 @@ def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") gap_analysis = database.gap_analysis(standards) - if gap_analysis is None: + if gap_analysis is None: return neo4j_not_running_rejection() return jsonify(gap_analysis) + @app.route("/rest/v1/standards", methods=["GET"]) @cache.cached(timeout=50) def standards() -> Any: database = db.Node_collection() standards = database.standards() - if standards is None: - neo4j_not_running_rejection() + if standards is None: + neo4j_not_running_rejection() return standards From 65eb93757d118a8d4f0f579f0f75f8608c4c685e Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 7 Sep 2023 12:49:08 +0100 Subject: [PATCH 21/75] Add gap analysis tests --- application/tests/db_test.py | 306 ++++++++++++++++++----------------- 1 file changed, 159 insertions(+), 147 deletions(-) diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 936e240c7..6ebcb04da 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -3,6 +3,7 @@ import os import tempfile import unittest +from unittest.mock import patch import uuid from copy import copy, deepcopy from pprint import pprint @@ -763,153 +764,6 @@ def test_get_nodes_with_pagination(self) -> None: (None, None, None), ) - def test_gap_analysis(self) -> None: - """Given - the following standards SA1, SA2, SA3 SAA1 , SB1, SD1, SDD1, SW1, SX1 - the following CREs CA, CB, CC, CD, CDD , CW, CX - the following links - CC -> CA, CB,CD - CD -> CDD - CA-> SA1, SAA1 - CB -> SB1 - CD -> SD1 - CDD -> SDD1 - CW -> SW1 - CX -> SA3, SX1 - NoCRE -> SA2 - - Then: - gap_analysis(SA) returns SA1, SA2, SA3 - gap_analysis(SA,SAA) returns SA1 <-> SAA1, SA2, SA3 - gap_analysis(SA,SDD) returns SA1 <-> SDD1, SA2, SA3 - gap_analysis(SA, SW) returns SA1,SA2,SA3, SW1 # no connection - gap_analysis(SA, SB, SD, SW) returns SA1 <->(SB1,SD1), SA2 , SW1, SA3 - gap_analysis(SA, SX) returns SA1, SA2, SA3->SX1 - - give me a single standard - give me two standards connected by same cre - give me two standards connected by cres who are children of the same cre - give me two standards connected by completely different cres - give me two standards with sections on different trees. - - give me two standards without connections - give me 3 or more standards - - """ - - collection = db.Node_collection() - collection.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) - - cres = { - "dbca": collection.add_cre(defs.CRE(id="1", description="CA", name="CA")), - "dbcb": collection.add_cre(defs.CRE(id="2", description="CB", name="CB")), - "dbcc": collection.add_cre(defs.CRE(id="3", description="CC", name="CC")), - "dbcd": collection.add_cre(defs.CRE(id="4", description="CD", name="CD")), - "dbcdd": collection.add_cre( - defs.CRE(id="5", description="CDD", name="CDD") - ), - "dbcw": collection.add_cre(defs.CRE(id="6", description="CW", name="CW")), - "dbcx": collection.add_cre(defs.CRE(id="7", description="CX", name="CX")), - } - def_standards = { - "sa1": defs.Standard(name="SA", section="SA1"), - "sa2": defs.Standard(name="SA", section="SA2"), - "sa3": defs.Standard(name="SA", section="SA3"), - "saa1": defs.Standard(name="SAA", section="SAA1"), - "sb1": defs.Standard(name="SB", section="SB1"), - "sd1": defs.Standard(name="SD", section="SD1"), - "sdd1": defs.Standard(name="SDD", section="SDD1"), - "sw1": defs.Standard(name="SW", section="SW1"), - "sx1": defs.Standard(name="SX", section="SX1"), - } - standards = {} - for k, s in def_standards.items(): - standards["db" + k] = collection.add_node(s) - ltype = defs.LinkTypes.LinkedTo - collection.add_link(cre=cres["dbca"], node=standards["dbsa1"]) - collection.add_link(cre=cres["dbca"], node=standards["dbsaa1"]) - collection.add_link(cre=cres["dbcb"], node=standards["dbsb1"]) - collection.add_link(cre=cres["dbcd"], node=standards["dbsd1"]) - collection.add_link(cre=cres["dbcdd"], node=standards["dbsdd1"]) - collection.add_link(cre=cres["dbcw"], node=standards["dbsw1"]) - collection.add_link(cre=cres["dbcx"], node=standards["dbsa3"]) - collection.add_link(cre=cres["dbcx"], node=standards["dbsx1"]) - - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbca"]) - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbcb"]) - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbcd"]) - collection.add_internal_link(group=cres["dbcd"], cre=cres["dbcdd"]) - - expected = { - "SA": [def_standards["sa1"], def_standards["sa2"], def_standards["sa3"]], - "SA,SAA": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["saa1"]) - ), - copy(def_standards["saa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SAA,SA": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["saa1"]) - ), - copy(def_standards["saa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SA,SDD": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sdd1"]) - ), - copy(def_standards["sdd1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SA,SW": [ - def_standards["sa1"], - def_standards["sa2"], - def_standards["sa3"], - def_standards["sw1"], - ], - "SA,SB,SD,SW": [ - copy(def_standards["sa1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sb1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sd1"])), - copy(def_standards["sb1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sa1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sd1"])), - copy(def_standards["sd1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sa1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sb1"])), - def_standards["sa2"], - def_standards["sa3"], - def_standards["sw1"], - ], - "SA,SX": [ - def_standards["sa1"], - def_standards["sa2"], - copy(def_standards["sa3"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sx1"]) - ), - copy(def_standards["sx1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa3"]) - ), - ], - } - - self.maxDiff = None - for args, expected_vals in expected.items(): - stands = args.split(",") - res = collection.gap_analysis(stands) - self.assertCountEqual(res, expected_vals) - def test_add_internal_link(self) -> None: """test that internal links are added successfully, edge cases: @@ -1285,6 +1139,163 @@ def test_get_root_cres(self): self.maxDiff = None self.assertEqual(root_cres, [cres[0], cres[1], cres[7]]) + def test_gap_analysis_disconnected(self): + collection = db.Node_collection() + collection.neo_db.connected = False + self.assertEqual(collection.gap_analysis(["a", "b"]), None) + + @patch.object(db.NEO_DB, 'gap_analysis') + def test_gap_analysis_no_nodes(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + + gap_mock.return_value = ([], []) + self.assertEqual(collection.gap_analysis(["a", "b"]), {}) + + @patch.object(db.NEO_DB, 'gap_analysis') + def test_gap_analysis_no_links(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + + gap_mock.return_value = ([{'id': 1}], []) + self.assertEqual(collection.gap_analysis(["a", "b"]), {1: {'start': {'id': 1}, 'paths': {}}} ) + + @patch.object(db.NEO_DB, 'gap_analysis') + def test_gap_analysis_one_link(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a" + }, + }, + ] + gap_mock.return_value = ([{'id': 1}], [{'start':{'id': 1}, 'end': {'id': 2}, 'path': path}]) + expected = {1: {'start': {'id': 1}, 'paths': { + 2: {'end': {'id': 2}, + 'path': path, + 'score': 0}} + }} + self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + + @patch.object(db.NEO_DB, 'gap_analysis') + def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a" + }, + }, + ] + path2 = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "RELATED", + "start": { + "id": "a" + }, + }, + ] + gap_mock.return_value = ([{'id': 1}], [{'start':{'id': 1}, 'end': {'id': 2}, 'path': path}, {'start':{'id': 1}, 'end': {'id': 2}, 'path': path2}]) + expected = {1: {'start': {'id': 1}, 'paths': { + 2: {'end': {'id': 2}, + 'path': path, + 'score': 0}} + }} + self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + + @patch.object(db.NEO_DB, 'gap_analysis') + def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a" + }, + }, + ] + path2 = [ + { + "end": { + "id": 1, + }, + "relationship": "LINKED_TO", + "start": { + "id": "a", + }, + }, + { + "end": { + "id": 2, + }, + "relationship": "RELATED", + "start": { + "id": "a" + }, + }, + ] + gap_mock.return_value = ([{'id': 1}], [{'start':{'id': 1}, 'end': {'id': 2}, 'path': path2}, {'start':{'id': 1}, 'end': {'id': 2}, 'path': path}]) + expected = {1: {'start': {'id': 1}, 'paths': { + 2: {'end': {'id': 2}, + 'path': path, + 'score': 0}} + }} + self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + def test_get_embeddings_by_doc_type_paginated(self): """Given: a range of embedding for Nodes and a range of embeddings for CREs when called with doc_type CRE return the cre embeddings @@ -1449,5 +1460,6 @@ def test_get_embeddings_by_doc_type(self): self.assertEqual(tool_emb, {}) + if __name__ == "__main__": unittest.main() From c684227ee744a51377128db198d0cb747c49f2b1 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 7 Sep 2023 13:05:11 +0100 Subject: [PATCH 22/75] Short drop down list --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 12 +- application/tests/db_test.py | 245 +++++++++--------- 2 files changed, 136 insertions(+), 121 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 8a538e4d4..139387693 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -60,7 +60,7 @@ export const GapAnalysis = () => { const result = await axios.get(`${apiUrl}/standards`); setLoading(false); setStandardOptions( - standardOptionsDefault.concat(result.data.map((x) => ({ key: x, text: x, value: x }))) + standardOptionsDefault.concat(result.data.sort().map((x) => ({ key: x, text: x, value: x }))) ); }; @@ -159,7 +159,8 @@ export const GapAnalysis = () => { target="_blank" > - + {' '} + {gapAnalysis[key].start.id}
{gapAnalysis[key].start.sectionID} {gapAnalysis[key].start.description} @@ -234,8 +235,11 @@ export const GapAnalysis = () => { trigger={ {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description} {GetStrength(path.score)}: - {path.score}){' '} + {path.end.subsection} {path.end.description}( + + {GetStrength(path.score)}:{path.score} + + ){' '} Date: Thu, 7 Sep 2023 13:25:03 +0100 Subject: [PATCH 23/75] Styling improvements and legends --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 93 ++++++++++++------- 1 file changed, 62 insertions(+), 31 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 139387693..833407f23 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,7 +1,18 @@ import axios from 'axios'; import React, { useEffect, useState } from 'react'; import { useLocation } from 'react-router-dom'; -import { Accordion, Button, Dropdown, DropdownItemProps, Grid, Icon, Popup, Table } from 'semantic-ui-react'; +import { + Accordion, + Button, + Container, + Dropdown, + DropdownItemProps, + Grid, + Icon, + Label, + Popup, + Table, +} from 'semantic-ui-react'; import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; import { useEnvironment } from '../../hooks'; @@ -96,43 +107,64 @@ export const GapAnalysis = () => { }; return ( -

+
- setBaseStandard(value?.toString())} - value={BaseStandard} - /> + - setCompareStandard(value?.toString())} - value={CompareStandard} - /> + {gapAnalysis && ( - - - + <> + + Generally: lower is better +
+ {GetStrength(0)}: Closely connected likely to have + majority overlap +
+ {GetStrength(6)}: Connected likely to have partial + overlap +
+ {GetStrength(22)}: Weakly connected likely to + have small or no overlap +
+
+ + + + )}
@@ -160,7 +192,6 @@ export const GapAnalysis = () => { >
{' '} - {gapAnalysis[key].start.id}
{gapAnalysis[key].start.sectionID} {gapAnalysis[key].start.description} From 22eda1e1e6a518c56ef12e2d8131b4dd224e021b Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Sep 2023 21:14:34 +0100 Subject: [PATCH 24/75] rm scikit version pin --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 025dbc184..f2daf8de2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ playwright psycopg2-binary pygithub python_markdown_maker==1.0 -scikit_learn==1.3.0 +scikit_learn scipy==1.11.2 semver setuptools==66.1.1 From bcdee15b46fd50cc09cd8b4490a882f65a61ac8c Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Sep 2023 21:21:52 +0100 Subject: [PATCH 25/75] loosen requirement in sqlalchemy --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f2daf8de2..811bd393e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,7 @@ scipy==1.11.2 semver setuptools==66.1.1 simplify_docx==0.1.2 -SQLAlchemy==2.0.20 +SQLAlchemy compliance-trestle nose==1.3.7 numpy==1.23.0 From 6eb182a2ffc354db73d93a0f26f306b7fb99139a Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Sep 2023 22:07:02 +0100 Subject: [PATCH 26/75] more deps changes --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 811bd393e..b0c02b9ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ Flask_Caching==2.0.2 flask_compress==1.13 Flask_Cors==4.0.0 Flask_Migrate==4.0.4 -Flask-SQLAlchemy==3.0.5 +Flask-SQLAlchemy gitpython google-api-core google_auth_oauthlib From 49d17c08749f2301f08707fed1bd4b448ddc82a5 Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 13 Sep 2023 16:48:27 +0100 Subject: [PATCH 27/75] fix deps --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b0c02b9ec..90b1e8043 100644 --- a/requirements.txt +++ b/requirements.txt @@ -44,7 +44,7 @@ pathable==0.4.3 pathspec==0.9.0 pbr==5.8.0 pep517==0.8.2 -Pillow==9.1.1 +Pillow-binary pip-autoremove==0.9.1 platformdirs==2.2.0 playwright==1.33.0 @@ -53,7 +53,7 @@ prance prompt-toolkit==3.0.19 proto-plus==1.22.2 protobuf==4.23.1 -psycopg2==2.9.1 +psycopg2-binary pyasn1==0.4.8 pyasn1-modules==0.2.8 pycodestyle==2.7.0 From c241914c0e4b4d68bc41e854f59685c219414eda Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Thu, 14 Sep 2023 12:05:02 +0100 Subject: [PATCH 28/75] Disable auto db pop (#372) * Add gap analysis tests * Short drop down list * Disable CRE_Graph auto population on startup * revert conflict mess with tests * Migrate Neo4j population to seperate function * Add CLI population command for DB * linting fixes --- application/cmd/cre_main.py | 10 ++++++++ application/database/db.py | 41 ++++++++++++++++++++++++------- application/tests/db_test.py | 2 -- application/utils/gap_analysis.py | 8 +++++- cre.py | 5 ++++ 5 files changed, 54 insertions(+), 12 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index d0bca16d8..0c02f6bb6 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -426,6 +426,8 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover generate_embeddings(args.cache_file) if args.owasp_proj_meta: owasp_metadata_to_cre(args.owasp_proj_meta) + if args.populate_neo4j_db: + populate_neo4j_db(args.cache_file) def db_connect(path: str): @@ -530,3 +532,11 @@ def owasp_metadata_to_cre(meta_file: str): }, """ raise NotImplementedError("someone needs to work on this") + + +def populate_neo4j_db(cache: str): + logger.info(f"Populating neo4j DB: Connecting to SQL DB") + database = db_connect(path=cache) + logger.info(f"Populating neo4j DB: Populating") + database.neo_db.populate_DB(database.session) + logger.info(f"Populating neo4j DB: Complete") diff --git a/application/database/db.py b/application/database/db.py index 3b96d7b03..7f79a6b3d 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -191,6 +191,34 @@ def instance(self): def __init__(sel): raise ValueError("NEO_DB is a singleton, please call instance() instead") + @classmethod + def populate_DB(self, session) -> nx.Graph: + graph = nx.DiGraph() + for il in session.query(InternalLinks).all(): + group = session.query(CRE).filter(CRE.id == il.group).first() + if not group: + logger.error(f"CRE {il.group} does not exist?") + self.add_cre(group) + + cre = session.query(CRE).filter(CRE.id == il.cre).first() + if not cre: + logger.error(f"CRE {il.cre} does not exist?") + self.add_cre(cre) + + self.link_CRE_to_CRE(il.group, il.cre, il.type) + + for lnk in session.query(Links).all(): + node = session.query(Node).filter(Node.id == lnk.node).first() + if not node: + logger.error(f"Node {lnk.node} does not exist?") + self.add_dbnode(node) + + cre = session.query(CRE).filter(CRE.id == lnk.cre).first() + self.add_cre(cre) + + self.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type) + return graph + @classmethod def add_cre(self, dbcre: CRE): if not self.connected: @@ -208,6 +236,7 @@ def add_cre(self, dbcre: CRE): def add_dbnode(self, dbnode: Node): if not self.connected: return + # TODO: Add diffrent Node types self.driver.execute_query( "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", nid=dbnode.id, @@ -363,14 +392,12 @@ def standards(self): class CRE_Graph: graph: nx.Graph = None - neo_db: NEO_DB = None __instance = None @classmethod - def instance(cls, session, neo_db: NEO_DB): + def instance(cls, session): if cls.__instance is None: cls.__instance = cls.__new__(cls) - cls.neo_db = neo_db cls.graph = cls.load_cre_graph(session) return cls.__instance @@ -386,7 +413,6 @@ def add_node(self, *args, **kwargs): @classmethod def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: if dbcre: - cls.neo_db.add_cre(dbcre) graph.add_node( f"CRE: {dbcre.id}", internal_id=dbcre.id, external_id=dbcre.external_id ) @@ -397,7 +423,6 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: @classmethod def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: - cls.neo_db.add_dbnode(dbnode) # coma separated tags graph.add_node( @@ -426,7 +451,6 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {il.group}", f"CRE: {il.cre}", ltype=il.type) - cls.neo_db.link_CRE_to_CRE(il.group, il.cre, il.type) for lnk in session.query(Links).all(): node = session.query(Node).filter(Node.id == lnk.node).first() @@ -438,7 +462,6 @@ def load_cre_graph(cls, session) -> nx.Graph: graph = cls.add_cre(dbcre=cre, graph=graph) graph.add_edge(f"CRE: {lnk.cre}", f"Node: {str(lnk.node)}", ltype=lnk.type) - cls.neo_db.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type) return graph @@ -449,8 +472,8 @@ class Node_collection: def __init__(self) -> None: if not os.environ.get("NO_LOAD_GRAPH"): - self.neo_db = NEO_DB.instance() - self.graph = CRE_Graph.instance(sqla.session, self.neo_db) + self.graph = CRE_Graph.instance(sqla.session) + self.neo_db = NEO_DB.instance() self.session = sqla.session def __get_external_links(self) -> List[Tuple[CRE, Node, str]]: diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 3c41908f1..714b11423 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1306,7 +1306,6 @@ def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): } self.assertEqual(collection.gap_analysis(["a", "b"]), expected) - def test_get_embeddings_by_doc_type_paginated(self): """Given: a range of embedding for Nodes and a range of embeddings for CREs when called with doc_type CRE return the cre embeddings @@ -1471,6 +1470,5 @@ def test_get_embeddings_by_doc_type(self): self.assertEqual(tool_emb, {}) - if __name__ == "__main__": unittest.main() diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py index 47f97e830..147fef3d3 100644 --- a/application/utils/gap_analysis.py +++ b/application/utils/gap_analysis.py @@ -1,4 +1,10 @@ -PENALTIES = {"RELATED": 20, "CONTAINS_UP": 2, "CONTAINS_DOWN": 1, "LINKED_TO": 0} +PENALTIES = { + "RELATED": 20, + "CONTAINS_UP": 2, + "CONTAINS_DOWN": 1, + "LINKED_TO": 0, + "SAME": 0, +} def get_path_score(path): diff --git a/cre.py b/cre.py index 0feb76a62..9593505e6 100644 --- a/cre.py +++ b/cre.py @@ -187,6 +187,11 @@ def main() -> None: action="store_true", help="for every node, download the text pointed to by the hyperlink and generate embeddings for the content of the specific node", ) + parser.add_argument( + "--populate_neo4j_db", + action="store_true", + help="populate the neo4j db", + ) args = parser.parse_args() From dca39616cf83787a810ae8196c5002b1a5b5e735 Mon Sep 17 00:00:00 2001 From: Spyros Date: Thu, 14 Sep 2023 12:17:19 +0100 Subject: [PATCH 29/75] fix pillow --- application/prompt_client/prompt_client.py | 1 - requirements.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/application/prompt_client/prompt_client.py b/application/prompt_client/prompt_client.py index f7de7bc82..86fe6ea6a 100644 --- a/application/prompt_client/prompt_client.py +++ b/application/prompt_client/prompt_client.py @@ -175,7 +175,6 @@ def generate_embeddings( ) # cls.cre_embeddings[id] = embedding - class PromptHandler: def __init__(self, database: db.Node_collection) -> None: self.ai_client = None diff --git a/requirements.txt b/requirements.txt index 90b1e8043..8be294463 100644 --- a/requirements.txt +++ b/requirements.txt @@ -44,7 +44,7 @@ pathable==0.4.3 pathspec==0.9.0 pbr==5.8.0 pep517==0.8.2 -Pillow-binary +Pillow pip-autoremove==0.9.1 platformdirs==2.2.0 playwright==1.33.0 From 6bd5c22d574a06678c806c0a6d25fe47f81f8977 Mon Sep 17 00:00:00 2001 From: john681611 Date: Mon, 18 Sep 2023 13:26:13 +0100 Subject: [PATCH 30/75] Use python 3.11.4 for CI --- .github/workflows/e2e.yml | 3 +++ .github/workflows/test.yml | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 7b0cbda8c..91392fdce 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -7,6 +7,9 @@ jobs: steps: - name: Check out code uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.11.4' - name: Install python dependencies run: sudo apt-get update && sudo apt-get install -y python3-setuptools python3-pip chromium-browser libgbm1 && make install-deps - name: Test-e2e diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 94bb1ca15..48feadc48 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,9 +7,10 @@ jobs: steps: - name: Check out code uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.11.4' - name: Install python dependencies run: sudo apt-get update && sudo apt-get install -y python3-setuptools python3-pip && make install-deps - name: Test - run: make test - - name: Test-e2e - run: make e2e \ No newline at end of file + run: make test \ No newline at end of file From 953c770f6a1e00d5dc49e012fbabb07f21c84de5 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 10:17:35 +0100 Subject: [PATCH 31/75] Parse cre_dep object from neo4j Node --- application/database/db.py | 78 +++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 7f79a6b3d..85281b274 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -366,6 +366,7 @@ def format_path_record(rec): } def format_record(rec): + # self.parse_node(rec) return { "name": rec["name"], "sectionID": rec["section_id"], @@ -380,7 +381,7 @@ def format_record(rec): ] @classmethod - def standards(self): + def standards(self) -> List[str]: if not self.connected: return records, _, _ = self.driver.execute_query( @@ -389,6 +390,79 @@ def standards(self): ) return records[0][0] + @classmethod + def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: + print(node) + name = node["name"] + id = node["id"] if "id" in node else None + description = node["description"] if "description" in node else None + links = [self.parse_link(link) for link in node["links"]] + tags = node["tags"] + metadata = node["metadata"] + if "Node" in node.labels: + return cre_defs.Node( + name=name, + id=id, + description=description, + links=links, + tags=tags, + metadata=metadata, + hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + version=(node["version"] if "version" in node else None), + ) + if "Code" in node.labels: + return cre_defs.Code( + name=name, + id=id, + description=description, + links=links, + tags=tags, + metadata=metadata, + hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + version=(node["version"] if "version" in node else None), + ) + if "Standard" in node.labels: + return cre_defs.Standard( + name=name, + id=id, + description=description, + links=links, + tags=tags, + metadata=metadata, + hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + version=(node["version"] if "version" in node else None), + section=node['section'], + sectionID='sectionID', + subsection=(node["subsection"] if "subsection" in node else None), + ) + if "Tool" in node.labels: + return cre_defs.Tool( + name=name, + id=id, + description=description, + links=links, + tags=tags, + metadata=metadata, + hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + version=(node["version"] if "version" in node else None), + section=node['section'], + sectionID='sectionID', + subsection=(node["subsection"] if "subsection" in node else None), + ) + if "CRE" in node.labels: + return cre_defs.CRE( + name=name, + id=id, + description=description, + links=links, + tags=tags, + metadata=metadata, + ) + + @classmethod + def parse_link(self, link): + return cre_defs.Link(ltype=link["ltype"], tags=link["tags"]) + class CRE_Graph: graph: nx.Graph = None @@ -1321,7 +1395,7 @@ def gap_analysis(self, node_names: List[str]): grouped_paths[key]["paths"][end_key] = path return grouped_paths - def standards(self): + def standards(self) -> List[str]: return self.neo_db.standards() def text_search(self, text: str) -> List[Optional[cre_defs.Document]]: From ee42b4b5d01e5d6b5b9946a3f4420fdd42846e67 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 12:18:40 +0100 Subject: [PATCH 32/75] Standardise Gap Analysis API Object --- application/database/db.py | 145 +++++++++++++----------------- application/utils/gap_analysis.py | 10 +-- 2 files changed, 65 insertions(+), 90 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 85281b274..abf75fc5c 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -224,11 +224,14 @@ def add_cre(self, dbcre: CRE): if not self.connected: return self.driver.execute_query( - "MERGE (n:CRE {id: $nid, name: $name, description: $description, external_id: $external_id})", - nid=dbcre.id, + "MERGE (n:CRE {id: $nid, name: $name, description: $description, doctype: $doctype, links: $links, metadata: $metadata, tags: $tags})", name=dbcre.name, + doctype="CRE", #dbcre.ntype, + nid=dbcre.id, description=dbcre.description, - external_id=dbcre.external_id, + links=[], #dbcre.links, + tags=dbcre.tags, + metadata="{}", #dbcre.metadata, database_="neo4j", ) @@ -236,20 +239,44 @@ def add_cre(self, dbcre: CRE): def add_dbnode(self, dbnode: Node): if not self.connected: return - # TODO: Add diffrent Node types - self.driver.execute_query( - "MERGE (n:Node {id: $nid, name: $name, section: $section, section_id: $section_id, subsection: $subsection, tags: $tags, version: $version, description: $description, ntype: $ntype})", - nid=dbnode.id, - name=dbnode.name, - section=dbnode.section, - section_id=dbnode.section_id, - subsection=dbnode.subsection or "", - tags=dbnode.tags, - version=dbnode.version or "", - description=dbnode.description, - ntype=dbnode.ntype, - database_="neo4j", - ) + if dbnode.ntype == "Standard": + self.driver.execute_query( + "MERGE (n:Standard {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink})", + name=dbnode.name, + doctype=dbnode.ntype, + nid=dbnode.id, + description=dbnode.description, + links=[], #dbnode.links, + tags=dbnode.tags, + metadata="{}", #dbnode.metadata, + hyperlink="", #dbnode.hyperlink or "", + version=dbnode.version or "", + section=dbnode.section, + sectionID=dbnode.section_id,#dbnode.sectionID, + subsection=dbnode.subsection or "", + database_="neo4j", + ) + return + if dbnode.ntype == "Tool": + self.driver.execute_query( + "MERGE (n:Tool {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink, tooltype: $tooltype})", + name=dbnode.name, + doctype=dbnode.ntype, + nid=dbnode.id, + description=dbnode.description, + links=[], #dbnode.links, + tags=dbnode.tags, + metadata="{}", #dbnode.metadata, + hyperlink="", #dbnode.hyperlink or "", + version=dbnode.version or "", + section=dbnode.section, + sectionID=dbnode.section_id,#dbnode.sectionID, + subsection=dbnode.subsection or "", + tooltype="", #dbnode.tooltype, + database_="neo4j", + ) + return + raise Exception(f"Unknown DB type: {dbnode.ntype}") @classmethod def link_CRE_to_CRE(self, id1, id2, link_type): @@ -272,7 +299,7 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type): if not self.connected: return self.driver.execute_query( - "MATCH (a:CRE), (b:Node) " + "MATCH (a:CRE), (b:Standard|Tool) " "WHERE a.id = $aID AND b.id = $bID " "CALL apoc.create.relationship(a,$relType, {},b) " "YIELD rel " @@ -289,7 +316,7 @@ def gap_analysis(self, name_1, name_2): return None, None base_standard, _, _ = self.driver.execute_query( """ - MATCH (BaseStandard:Node {name: $name1}) + MATCH (BaseStandard:Standard {name: $name1}) RETURN BaseStandard """, name1=name_1, @@ -298,8 +325,8 @@ def gap_analysis(self, name_1, name_2): path_records_all, _, _ = self.driver.execute_query( """ - OPTIONAL MATCH (BaseStandard:Node {name: $name1}) - OPTIONAL MATCH (CompareStandard:Node {name: $name2}) + OPTIONAL MATCH (BaseStandard:Standard {name: $name1}) + OPTIONAL MATCH (CompareStandard:Standard {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) @@ -311,8 +338,8 @@ def gap_analysis(self, name_1, name_2): ) path_records, _, _ = self.driver.execute_query( """ - OPTIONAL MATCH (BaseStandard:Node {name: $name1}) - OPTIONAL MATCH (CompareStandard:Node {name: $name2}) + OPTIONAL MATCH (BaseStandard:Standard {name: $name1}) + OPTIONAL MATCH (CompareStandard:Standard {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) @@ -325,58 +352,19 @@ def gap_analysis(self, name_1, name_2): def format_segment(seg): return { - "start": { - "name": seg.start_node["name"], - "sectionID": seg.start_node["section_id"], - "section": seg.start_node["section"], - "subsection": seg.start_node["subsection"], - "description": seg.start_node["description"], - "id": seg.start_node["id"], - }, - "end": { - "name": seg.end_node["name"], - "sectionID": seg.end_node["section_id"], - "section": seg.end_node["section"], - "subsection": seg.end_node["subsection"], - "description": seg.end_node["description"], - "id": seg.end_node["id"], - }, + "start": self.parse_node(seg.start_node), + "end": self.parse_node(seg.end_node), "relationship": seg.type, } def format_path_record(rec): return { - "start": { - "name": rec.start_node["name"], - "sectionID": rec.start_node["section_id"], - "section": rec.start_node["section"], - "subsection": rec.start_node["subsection"], - "description": rec.start_node["description"], - "id": rec.start_node["id"], - }, - "end": { - "name": rec.end_node["name"], - "sectionID": rec.end_node["section_id"], - "section": rec.end_node["section"], - "subsection": rec.end_node["subsection"], - "description": rec.end_node["description"], - "id": rec.end_node["id"], - }, + "start": self.parse_node(rec.start_node), + "end": self.parse_node(rec.end_node), "path": [format_segment(seg) for seg in rec.relationships], } - def format_record(rec): - # self.parse_node(rec) - return { - "name": rec["name"], - "sectionID": rec["section_id"], - "section": rec["section"], - "subsection": rec["subsection"], - "description": rec["description"], - "id": rec["id"], - } - - return [format_record(rec["BaseStandard"]) for rec in base_standard], [ + return [self.parse_node(rec["BaseStandard"]) for rec in base_standard], [ format_path_record(rec["p"]) for rec in (path_records + path_records_all) ] @@ -385,31 +373,19 @@ def standards(self) -> List[str]: if not self.connected: return records, _, _ = self.driver.execute_query( - 'MATCH (n:Node {ntype: "Standard"}) ' "RETURN collect(distinct n.name)", + 'MATCH (n:Standard) ' "RETURN collect(distinct n.name)", database_="neo4j", ) return records[0][0] @classmethod def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: - print(node) name = node["name"] id = node["id"] if "id" in node else None description = node["description"] if "description" in node else None links = [self.parse_link(link) for link in node["links"]] tags = node["tags"] metadata = node["metadata"] - if "Node" in node.labels: - return cre_defs.Node( - name=name, - id=id, - description=description, - links=links, - tags=tags, - metadata=metadata, - hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - ) if "Code" in node.labels: return cre_defs.Code( name=name, @@ -458,6 +434,7 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: tags=tags, metadata=metadata, ) + raise Exception(f"Unknown node {node.labels}") @classmethod def parse_link(self, link): @@ -1372,20 +1349,18 @@ def find_path_between_nodes( return res def gap_analysis(self, node_names: List[str]): - if not self.neo_db.connected: - return None base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1]) if base_standard is None: return None grouped_paths = {} for node in base_standard: - key = node["id"] + key = node.id if key not in grouped_paths: grouped_paths[key] = {"start": node, "paths": {}} for path in paths: - key = path["start"]["id"] - end_key = path["end"]["id"] + key = path["start"].id + end_key = path["end"].id path["score"] = get_path_score(path) del path["start"] if end_key in grouped_paths[key]["paths"]: diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py index 147fef3d3..3484f5fc1 100644 --- a/application/utils/gap_analysis.py +++ b/application/utils/gap_analysis.py @@ -9,7 +9,7 @@ def get_path_score(path): score = 0 - previous_id = path["start"]["id"] + previous_id = path["start"].id for step in path["path"]: penalty_type = step["relationship"] @@ -21,12 +21,12 @@ def get_path_score(path): def get_relation_direction(step, previous_id): - if step["start"]["id"] == previous_id: + if step["start"].id == previous_id: return "UP" return "DOWN" def get_next_id(step, previous_id): - if step["start"]["id"] == previous_id: - return step["end"]["id"] - return step["start"]["id"] + if step["start"].id == previous_id: + return step["end"].id + return step["start"].id From b4b9309c7b981b1124e69302bfa94ebd1ea27f85 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 12:45:06 +0100 Subject: [PATCH 33/75] Fix bad parse field --- application/database/db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index abf75fc5c..c336857d1 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -408,7 +408,7 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), section=node['section'], - sectionID='sectionID', + sectionID=node['sectionID'], subsection=(node["subsection"] if "subsection" in node else None), ) if "Tool" in node.labels: @@ -422,7 +422,7 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), section=node['section'], - sectionID='sectionID', + sectionID=node['sectionID'], subsection=(node["subsection"] if "subsection" in node else None), ) if "CRE" in node.labels: From 88252d61039e6b9b1933b19ab86727a44c41c296 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 12:45:49 +0100 Subject: [PATCH 34/75] Use Standard display format and create GA type --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 21 ++++++++----------- application/frontend/src/types.ts | 16 ++++++++++++++ application/frontend/src/utils/document.ts | 4 ++-- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 833407f23..9dc1f0f4a 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -14,6 +14,9 @@ import { Table, } from 'semantic-ui-react'; +import { GapAnalysisPathStart } from '../../types'; +import { getDocumentDisplayName } from '../../utils'; + import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; import { useEnvironment } from '../../hooks'; @@ -48,7 +51,7 @@ export const GapAnalysis = () => { const [CompareStandard, setCompareStandard] = useState( searchParams.get('compare') ?? '' ); - const [gapAnalysis, setGapAnalysis] = useState(); + const [gapAnalysis, setGapAnalysis] = useState>(); const [activeIndex, SetActiveIndex] = useState(); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); @@ -183,18 +186,14 @@ export const GapAnalysis = () => {

- {gapAnalysis[key].start.name} {gapAnalysis[key].start.section}{' '} - {gapAnalysis[key].start.subsection} + {getDocumentDisplayName(gapAnalysis[key].start, true)} - {' '} -
- {gapAnalysis[key].start.sectionID} - {gapAnalysis[key].start.description} +

@@ -217,8 +216,7 @@ export const GapAnalysis = () => { .join('')} trigger={ - {path.end.name} {path.end.sectionID} {path.end.section} {path.end.subsection}{' '} - {path.end.description} ( + {getDocumentDisplayName(path.end, true)} ( {GetStrength(path.score)}:{path.score} @@ -248,7 +246,7 @@ export const GapAnalysis = () => { {Object.values(gapAnalysis[key].paths) .sort((a, b) => a.score - b.score) - .slice(3, gapAnalysis[key].paths.length) + .slice(3, Object.keys(gapAnalysis[key].paths).length) .map((path) => { let segmentID = gapAnalysis[key].start.id; return ( @@ -265,8 +263,7 @@ export const GapAnalysis = () => { .join('')} trigger={ - {path.end.name} {path.end.sectionID} {path.end.section}{' '} - {path.end.subsection} {path.end.description}( + {getDocumentDisplayName(path.end, true)} {GetStrength(path.score)}:{path.score} diff --git a/application/frontend/src/types.ts b/application/frontend/src/types.ts index c8b7cec72..e24aa94a8 100644 --- a/application/frontend/src/types.ts +++ b/application/frontend/src/types.ts @@ -19,3 +19,19 @@ export interface LinkedDocument { document: Document; ltype: string; } + +interface GapAnalysisPathSegment { + start: Document; + end: Document; + relationship: string; +}; + +interface GapAnalysisPath { + end: Document; + path: GapAnalysisPathSegment[] +}; + +export interface GapAnalysisPathStart { + start: Document; + paths: Record; +}; diff --git a/application/frontend/src/utils/document.ts b/application/frontend/src/utils/document.ts index 07b5a784d..3ab94dedf 100644 --- a/application/frontend/src/utils/document.ts +++ b/application/frontend/src/utils/document.ts @@ -7,14 +7,14 @@ import { } from '../const'; import { Document, LinkedDocument } from '../types'; -export const getDocumentDisplayName = (document: Document) => { +export const getDocumentDisplayName = (document: Document, noID=false) => { // [document.doctype, document.id, document.name, document.section, document.subsection].filter(Boolean).join(' - '); // format: Standard - ASVS - V1.1 if (!document) { return ''; } return [ document.doctype, - document.id, + noID? "" : document.id, document.name, document.version, document.sectionID, From b1d96f3067604c437c216f072ec58ede8d23df34 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 12:55:24 +0100 Subject: [PATCH 35/75] GA: Show all strong links by default (or min of 3) --- application/database/db.py | 34 +++++++++---------- .../src/pages/GapAnalysis/GapAnalysis.tsx | 19 ++++++----- application/frontend/src/types.ts | 8 ++--- application/frontend/src/utils/document.ts | 4 +-- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index c336857d1..03783a974 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -226,12 +226,12 @@ def add_cre(self, dbcre: CRE): self.driver.execute_query( "MERGE (n:CRE {id: $nid, name: $name, description: $description, doctype: $doctype, links: $links, metadata: $metadata, tags: $tags})", name=dbcre.name, - doctype="CRE", #dbcre.ntype, + doctype="CRE", # dbcre.ntype, nid=dbcre.id, description=dbcre.description, - links=[], #dbcre.links, + links=[], # dbcre.links, tags=dbcre.tags, - metadata="{}", #dbcre.metadata, + metadata="{}", # dbcre.metadata, database_="neo4j", ) @@ -246,13 +246,13 @@ def add_dbnode(self, dbnode: Node): doctype=dbnode.ntype, nid=dbnode.id, description=dbnode.description, - links=[], #dbnode.links, + links=[], # dbnode.links, tags=dbnode.tags, - metadata="{}", #dbnode.metadata, - hyperlink="", #dbnode.hyperlink or "", + metadata="{}", # dbnode.metadata, + hyperlink="", # dbnode.hyperlink or "", version=dbnode.version or "", section=dbnode.section, - sectionID=dbnode.section_id,#dbnode.sectionID, + sectionID=dbnode.section_id, # dbnode.sectionID, subsection=dbnode.subsection or "", database_="neo4j", ) @@ -264,15 +264,15 @@ def add_dbnode(self, dbnode: Node): doctype=dbnode.ntype, nid=dbnode.id, description=dbnode.description, - links=[], #dbnode.links, + links=[], # dbnode.links, tags=dbnode.tags, - metadata="{}", #dbnode.metadata, - hyperlink="", #dbnode.hyperlink or "", + metadata="{}", # dbnode.metadata, + hyperlink="", # dbnode.hyperlink or "", version=dbnode.version or "", section=dbnode.section, - sectionID=dbnode.section_id,#dbnode.sectionID, + sectionID=dbnode.section_id, # dbnode.sectionID, subsection=dbnode.subsection or "", - tooltype="", #dbnode.tooltype, + tooltype="", # dbnode.tooltype, database_="neo4j", ) return @@ -373,7 +373,7 @@ def standards(self) -> List[str]: if not self.connected: return records, _, _ = self.driver.execute_query( - 'MATCH (n:Standard) ' "RETURN collect(distinct n.name)", + "MATCH (n:Standard) " "RETURN collect(distinct n.name)", database_="neo4j", ) return records[0][0] @@ -407,8 +407,8 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: metadata=metadata, hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), - section=node['section'], - sectionID=node['sectionID'], + section=node["section"], + sectionID=node["sectionID"], subsection=(node["subsection"] if "subsection" in node else None), ) if "Tool" in node.labels: @@ -421,8 +421,8 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: metadata=metadata, hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), - section=node['section'], - sectionID=node['sectionID'], + section=node["section"], + sectionID=node["sectionID"], subsection=(node["subsection"] if "subsection" in node else None), ) if "CRE" in node.labels: diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 9dc1f0f4a..21c81b904 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -14,11 +14,10 @@ import { Table, } from 'semantic-ui-react'; -import { GapAnalysisPathStart } from '../../types'; -import { getDocumentDisplayName } from '../../utils'; - import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; import { useEnvironment } from '../../hooks'; +import { GapAnalysisPathStart } from '../../types'; +import { getDocumentDisplayName } from '../../utils'; const GetSegmentText = (segment, segmentID) => { let textPart = segment.end; @@ -69,6 +68,9 @@ export const GapAnalysis = () => { return 'Orange'; }; + const GetStrongPathsCount = (paths) => + Math.max(Object.values(paths).filter((x) => GetStrength(x.score) === 'Strong').length, 3); + useEffect(() => { const fetchData = async () => { const result = await axios.get(`${apiUrl}/standards`); @@ -185,9 +187,7 @@ export const GapAnalysis = () => {

- - {getDocumentDisplayName(gapAnalysis[key].start, true)} - + {getDocumentDisplayName(gapAnalysis[key].start, true)} { {Object.values(gapAnalysis[key].paths) .sort((a, b) => a.score - b.score) - .slice(0, 3) + .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) .map((path) => { let segmentID = gapAnalysis[key].start.id; return ( @@ -246,7 +246,10 @@ export const GapAnalysis = () => { {Object.values(gapAnalysis[key].paths) .sort((a, b) => a.score - b.score) - .slice(3, Object.keys(gapAnalysis[key].paths).length) + .slice( + GetStrongPathsCount(gapAnalysis[key].paths), + Object.keys(gapAnalysis[key].paths).length + ) .map((path) => { let segmentID = gapAnalysis[key].start.id; return ( diff --git a/application/frontend/src/types.ts b/application/frontend/src/types.ts index e24aa94a8..64d73cdca 100644 --- a/application/frontend/src/types.ts +++ b/application/frontend/src/types.ts @@ -24,14 +24,14 @@ interface GapAnalysisPathSegment { start: Document; end: Document; relationship: string; -}; +} interface GapAnalysisPath { end: Document; - path: GapAnalysisPathSegment[] -}; + path: GapAnalysisPathSegment[]; +} export interface GapAnalysisPathStart { start: Document; paths: Record; -}; +} diff --git a/application/frontend/src/utils/document.ts b/application/frontend/src/utils/document.ts index 3ab94dedf..1e01d2b35 100644 --- a/application/frontend/src/utils/document.ts +++ b/application/frontend/src/utils/document.ts @@ -7,14 +7,14 @@ import { } from '../const'; import { Document, LinkedDocument } from '../types'; -export const getDocumentDisplayName = (document: Document, noID=false) => { +export const getDocumentDisplayName = (document: Document, noID = false) => { // [document.doctype, document.id, document.name, document.section, document.subsection].filter(Boolean).join(' - '); // format: Standard - ASVS - V1.1 if (!document) { return ''; } return [ document.doctype, - noID? "" : document.id, + noID ? '' : document.id, document.name, document.version, document.sectionID, From 8812441f0ca54d1f2737faba4272d4b65ab9dc00 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 13:22:55 +0100 Subject: [PATCH 36/75] Better route display --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 62 ++++++++++++------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 21c81b904..78d8d9aea 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -22,15 +22,23 @@ import { getDocumentDisplayName } from '../../utils'; const GetSegmentText = (segment, segmentID) => { let textPart = segment.end; let nextID = segment.end.id; - let arrow = '->'; + let arrow = ; if (segmentID !== segment.start.id) { textPart = segment.start; nextID = segment.start.id; - arrow = '<-'; + arrow = ; } - const text = `${arrow} ${segment.relationship} ${arrow} ${textPart.name} ${textPart.sectionID ?? ''} ${ - textPart.section ?? '' - } ${textPart.subsection ?? ''} ${textPart.description ?? ''}`; + const text = ( + <> +
+ {arrow}{' '} + + {segment.relationship.replace('_', ' ').toLowerCase()} + +
{getDocumentDisplayName(textPart, true)} {textPart.section ?? ''} {textPart.subsection ?? ''}{' '} + {textPart.description ?? ''} + + ); return { text, nextID }; }; @@ -187,7 +195,7 @@ export const GapAnalysis = () => {

- {getDocumentDisplayName(gapAnalysis[key].start, true)} + {getDocumentDisplayName(gapAnalysis[key].start, true)}{' '} { { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} trigger={ {getDocumentDisplayName(path.end, true)} ( @@ -229,7 +232,16 @@ export const GapAnalysis = () => { } - /> + > + + {getDocumentDisplayName(gapAnalysis[key].start, true)} + {path.path.map((segment) => { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + })} + +
); @@ -256,17 +268,12 @@ export const GapAnalysis = () => { { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - }) - .join('')} trigger={ - {getDocumentDisplayName(path.end, true)} + {getDocumentDisplayName(path.end, true)} ( {GetStrength(path.score)}:{path.score} @@ -279,7 +286,16 @@ export const GapAnalysis = () => { } - /> + > + + {getDocumentDisplayName(gapAnalysis[key].start, true)} + {path.path.map((segment) => { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + })} + +
); From 1ad292a6610833ac34b5c0d34ca72078aff7e6c1 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 13:27:28 +0100 Subject: [PATCH 37/75] GA add support for tools --- application/database/db.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 03783a974..f5c899fba 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -316,7 +316,7 @@ def gap_analysis(self, name_1, name_2): return None, None base_standard, _, _ = self.driver.execute_query( """ - MATCH (BaseStandard:Standard {name: $name1}) + MATCH (BaseStandard:Standard|Tool {name: $name1}) RETURN BaseStandard """, name1=name_1, @@ -325,8 +325,8 @@ def gap_analysis(self, name_1, name_2): path_records_all, _, _ = self.driver.execute_query( """ - OPTIONAL MATCH (BaseStandard:Standard {name: $name1}) - OPTIONAL MATCH (CompareStandard:Standard {name: $name2}) + OPTIONAL MATCH (BaseStandard:Standard|Tool {name: $name1}) + OPTIONAL MATCH (CompareStandard:Standard|Tool {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) @@ -338,8 +338,8 @@ def gap_analysis(self, name_1, name_2): ) path_records, _, _ = self.driver.execute_query( """ - OPTIONAL MATCH (BaseStandard:Standard {name: $name1}) - OPTIONAL MATCH (CompareStandard:Standard {name: $name2}) + OPTIONAL MATCH (BaseStandard:Standard|Tool {name: $name1}) + OPTIONAL MATCH (CompareStandard:Standard|Tool {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n.name = $name1 or n.name = $name2) @@ -373,7 +373,7 @@ def standards(self) -> List[str]: if not self.connected: return records, _, _ = self.driver.execute_query( - "MATCH (n:Standard) " "RETURN collect(distinct n.name)", + "MATCH (n:Standard|Tool) " "RETURN collect(distinct n.name)", database_="neo4j", ) return records[0][0] From b2c9ed1da21dc7eb5fb79972b792f4ce9f773056 Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 15 Sep 2023 11:48:58 +0100 Subject: [PATCH 38/75] Fix existing tests --- application/tests/db_test.py | 136 +++++++------- application/tests/gap_analysis_test.py | 241 ++++++++++++------------- 2 files changed, 180 insertions(+), 197 deletions(-) diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 714b11423..0f2193b97 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1139,9 +1139,12 @@ def test_get_root_cres(self): self.maxDiff = None self.assertEqual(root_cres, [cres[0], cres[1], cres[7]]) - def test_gap_analysis_disconnected(self): + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_disconnected(self, gap_mock): collection = db.Node_collection() collection.neo_db.connected = False + gap_mock.return_value = (None, None) + self.assertEqual(collection.gap_analysis(["a", "b"]), None) @patch.object(db.NEO_DB, "gap_analysis") @@ -1157,9 +1160,10 @@ def test_gap_analysis_no_links(self, gap_mock): collection = db.Node_collection() collection.neo_db.connected = True - gap_mock.return_value = ([{"id": 1}], []) + gap_mock.return_value = ([defs.CRE(name="bob", id=1)], []) self.assertEqual( - collection.gap_analysis(["a", "b"]), {1: {"start": {"id": 1}, "paths": {}}} + collection.gap_analysis(["a", "b"]), + {1: {"start": defs.CRE(name="bob", id=1), "paths": {}}}, ) @patch.object(db.NEO_DB, "gap_analysis") @@ -1168,30 +1172,32 @@ def test_gap_analysis_one_link(self, gap_mock): collection.neo_db.connected = True path = [ { - "end": { - "id": 1, - }, + "end": defs.CRE(name="bob", id=1), "relationship": "LINKED_TO", - "start": { - "id": "a", - }, + "start": defs.CRE(name="bob", id="a"), }, { - "end": { - "id": 2, - }, + "end": defs.CRE(name="bob", id=2), "relationship": "LINKED_TO", - "start": {"id": "a"}, + "start": defs.CRE(name="bob", id="a"), }, ] gap_mock.return_value = ( - [{"id": 1}], - [{"start": {"id": 1}, "end": {"id": 2}, "path": path}], + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + } + ], ) expected = { 1: { - "start": {"id": 1}, - "paths": {2: {"end": {"id": 2}, "path": path, "score": 0}}, + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, } } self.assertEqual(collection.gap_analysis(["a", "b"]), expected) @@ -1202,51 +1208,49 @@ def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): collection.neo_db.connected = True path = [ { - "end": { - "id": 1, - }, + "end": defs.CRE(name="bob", id=1), "relationship": "LINKED_TO", - "start": { - "id": "a", - }, + "start": defs.CRE(name="bob", id="a"), }, { - "end": { - "id": 2, - }, + "end": defs.CRE(name="bob", id=2), "relationship": "LINKED_TO", - "start": {"id": "a"}, + "start": defs.CRE(name="bob", id="a"), }, ] path2 = [ { - "end": { - "id": 1, - }, + "end": defs.CRE(name="bob", id=1), "relationship": "LINKED_TO", - "start": { - "id": "a", - }, + "start": defs.CRE(name="bob", id="a"), }, { - "end": { - "id": 2, - }, + "end": defs.CRE(name="bob", id=2), "relationship": "RELATED", - "start": {"id": "a"}, + "start": defs.CRE(name="bob", id="a"), }, ] gap_mock.return_value = ( - [{"id": 1}], + [defs.CRE(name="bob", id=1)], [ - {"start": {"id": 1}, "end": {"id": 2}, "path": path}, - {"start": {"id": 1}, "end": {"id": 2}, "path": path2}, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + }, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path2, + }, ], ) expected = { 1: { - "start": {"id": 1}, - "paths": {2: {"end": {"id": 2}, "path": path, "score": 0}}, + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, } } self.assertEqual(collection.gap_analysis(["a", "b"]), expected) @@ -1257,51 +1261,49 @@ def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): collection.neo_db.connected = True path = [ { - "end": { - "id": 1, - }, + "end": defs.CRE(name="bob", id=1), "relationship": "LINKED_TO", - "start": { - "id": "a", - }, + "start": defs.CRE(name="bob", id="a"), }, { - "end": { - "id": 2, - }, + "end": defs.CRE(name="bob", id=2), "relationship": "LINKED_TO", - "start": {"id": "a"}, + "start": defs.CRE(name="bob", id="a"), }, ] path2 = [ { - "end": { - "id": 1, - }, + "end": defs.CRE(name="bob", id=1), "relationship": "LINKED_TO", - "start": { - "id": "a", - }, + "start": defs.CRE(name="bob", id="a"), }, { - "end": { - "id": 2, - }, + "end": defs.CRE(name="bob", id=2), "relationship": "RELATED", - "start": {"id": "a"}, + "start": defs.CRE(name="bob", id="a"), }, ] gap_mock.return_value = ( - [{"id": 1}], + [defs.CRE(name="bob", id=1)], [ - {"start": {"id": 1}, "end": {"id": 2}, "path": path2}, - {"start": {"id": 1}, "end": {"id": 2}, "path": path}, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path2, + }, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + }, ], ) expected = { 1: { - "start": {"id": 1}, - "paths": {2: {"end": {"id": 2}, "path": path, "score": 0}}, + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, } } self.assertEqual(collection.gap_analysis(["a", "b"]), expected) diff --git a/application/tests/gap_analysis_test.py b/application/tests/gap_analysis_test.py index 396da8ee4..b4bf1a7ed 100644 --- a/application/tests/gap_analysis_test.py +++ b/application/tests/gap_analysis_test.py @@ -1,4 +1,5 @@ import unittest +from application.defs import cre_defs from application.utils.gap_analysis import ( get_path_score, @@ -16,47 +17,57 @@ def setUp(self) -> None: return None def test_get_relation_direction_UP(self): - step = {"start": {"id": "123"}, "end": {"id": "234"}} + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } self.assertEqual(get_relation_direction(step, "123"), "UP") def test_get_relation_direction_DOWN(self): - step = {"start": {"id": "123"}, "end": {"id": "234"}} + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } self.assertEqual(get_relation_direction(step, "234"), "DOWN") def test_get_next_id_start(self): - step = {"start": {"id": "123"}, "end": {"id": "234"}} + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } self.assertEqual(get_next_id(step, "234"), "123") def test_get_next_id_end(self): - step = {"start": {"id": "123"}, "end": {"id": "234"}} + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } self.assertEqual(get_next_id(step, "123"), "234") def test_get_path_score_direct_siblings_returns_zero(self): path = { - "start": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), "path": [ { - "end": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), "relationship": "LINKED_TO", - "start": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), }, { - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), "relationship": "LINKED_TO", - "start": { - "id": "e2ac59b2-c1d8-4525-a6b3-155d480aecc9", - }, + "start": cre_defs.CRE( + name="bob", id="e2ac59b2-c1d8-4525-a6b3-155d480aecc9" + ), }, ], } @@ -64,39 +75,33 @@ def test_get_path_score_direct_siblings_returns_zero(self): def test_get_path_score_one_up_returns_one_up_penaltiy(self): path = { - "start": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), "path": [ { - "end": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), "relationship": "LINKED_TO", - "start": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), }, { - "end": { - "id": "123", - }, + "end": cre_defs.CRE(name="bob", id="123"), "relationship": "CONTAINS", - "start": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), }, { - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), "relationship": "LINKED_TO", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, ], } @@ -104,39 +109,33 @@ def test_get_path_score_one_up_returns_one_up_penaltiy(self): def test_get_path_score_one_down_one_returns_one_down_penaltiy(self): path = { - "start": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), "path": [ { - "end": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), "relationship": "LINKED_TO", - "start": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), }, { - "end": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "end": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), "relationship": "CONTAINS", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, { - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), "relationship": "LINKED_TO", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, ], } @@ -144,39 +143,33 @@ def test_get_path_score_one_down_one_returns_one_down_penaltiy(self): def test_get_path_score_related_returns_related_penalty(self): path = { - "start": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), "path": [ { - "end": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), "relationship": "LINKED_TO", - "start": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), }, { - "end": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "end": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), "relationship": "RELATED", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, { - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), "relationship": "LINKED_TO", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, ], } @@ -184,57 +177,45 @@ def test_get_path_score_related_returns_related_penalty(self): def test_get_path_score_one_of_each_returns_penalty(self): path = { - "start": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), "path": [ { - "end": { - "id": "029f7cd7-ef2f-4f25-b0d2-3227cde4b34b", - }, + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), "relationship": "LINKED_TO", - "start": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), }, { - "end": { - "id": "07bc9f6f-5387-4dc6-b277-0022ed76049f", - }, + "end": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), "relationship": "CONTAINS", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, { - "end": { - "id": "456", - }, + "end": cre_defs.CRE(name="bob", id="456"), "relationship": "RELATED", - "start": { - "id": "123", - }, + "start": cre_defs.CRE(name="bob", id="123"), }, { - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), "relationship": "CONTAINS", - "start": { - "id": "456", - }, + "start": cre_defs.CRE(name="bob", id="456"), }, { - "end": { - "id": "7d030730-14cc-4c43-8927-f2d0f5fbcf5d", - }, + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), "relationship": "LINKED_TO", - "start": { - "id": "456", - }, + "start": cre_defs.CRE(name="bob", id="456"), }, ], } From fcfbbb43d7f438fc0f69eeece202495666724db9 Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 15 Sep 2023 14:50:02 +0100 Subject: [PATCH 39/75] Add parsing tests --- application/database/db.py | 46 +++++++---- application/tests/db_test.py | 155 +++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+), 16 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index f5c899fba..f44c9ed32 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -276,6 +276,20 @@ def add_dbnode(self, dbnode: Node): database_="neo4j", ) return + if dbnode.ntype == "Code": + self.driver.execute_query( + "MERGE (n:Code {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink})", + name=dbnode.name, + doctype=dbnode.ntype, + nid=dbnode.id, + description=dbnode.description, + links=[], # dbnode.links, + tags=dbnode.tags, + metadata="{}", # dbnode.metadata, + hyperlink="", # dbnode.hyperlink or "", + version=dbnode.version or "", + ) + return raise Exception(f"Unknown DB type: {dbnode.ntype}") @classmethod @@ -383,18 +397,18 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: name = node["name"] id = node["id"] if "id" in node else None description = node["description"] if "description" in node else None - links = [self.parse_link(link) for link in node["links"]] + # links = [self.parse_link(link) for link in node["links"]] tags = node["tags"] - metadata = node["metadata"] + # metadata = node["metadata"] if "Code" in node.labels: return cre_defs.Code( name=name, id=id, description=description, - links=links, + # links=links, tags=tags, - metadata=metadata, - hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + # metadata=metadata, + # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), ) if "Standard" in node.labels: @@ -402,10 +416,10 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: name=name, id=id, description=description, - links=links, + # links=links, tags=tags, - metadata=metadata, - hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + # metadata=metadata, + # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), section=node["section"], sectionID=node["sectionID"], @@ -416,10 +430,10 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: name=name, id=id, description=description, - links=links, + # links=links, tags=tags, - metadata=metadata, - hyperlink=(node["hyperlink"] if "hyperlink" in node else None), + # metadata=metadata, + # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), section=node["section"], sectionID=node["sectionID"], @@ -430,15 +444,15 @@ def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: name=name, id=id, description=description, - links=links, + # links=links, tags=tags, - metadata=metadata, + # metadata=metadata, ) raise Exception(f"Unknown node {node.labels}") - @classmethod - def parse_link(self, link): - return cre_defs.Link(ltype=link["ltype"], tags=link["tags"]) + # @classmethod + # def parse_link(self, link): + # return cre_defs.Link(ltype=link["ltype"], tags=link["tags"]) class CRE_Graph: diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 0f2193b97..023b0a8dd 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -9,6 +9,7 @@ from pprint import pprint from pydoc import doc from typing import Any, Dict, List, Union +import neo4j import yaml from application import create_app, sqla # type: ignore @@ -1308,6 +1309,160 @@ def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): } self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + def test_neo_db_parse_node_code(self): + collection = db.Node_collection() + name = "name" + id = "id" + description = "description" + tags = "tags" + version = "version" + expected = defs.Code( + name=name, + id=id, + description=description, + tags=tags, + version=version, + ) + graph_node = neo4j.graph.Node( + None, + "123", + "id", + n_labels=["Code"], + properties={ + "name": name, + "id": id, + "description": description, + "tags": tags, + "version": version, + }, + ) + self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + + def test_neo_db_parse_node_standard(self): + collection = db.Node_collection() + name = "name" + id = "id" + description = "description" + tags = "tags" + version = "version" + section = "section" + sectionID = "sectionID" + subsection = "subsection" + expected = defs.Standard( + name=name, + id=id, + description=description, + tags=tags, + version=version, + section=section, + sectionID=sectionID, + subsection=subsection, + ) + graph_node = neo4j.graph.Node( + None, + "123", + "id", + n_labels=["Standard"], + properties={ + "name": name, + "id": id, + "description": description, + "tags": tags, + "version": version, + "section": section, + "sectionID": sectionID, + "subsection": subsection, + }, + ) + self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + + def test_neo_db_parse_node_tool(self): + collection = db.Node_collection() + name = "name" + id = "id" + description = "description" + tags = "tags" + version = "version" + section = "section" + sectionID = "sectionID" + subsection = "subsection" + expected = defs.Tool( + name=name, + id=id, + description=description, + tags=tags, + version=version, + section=section, + sectionID=sectionID, + subsection=subsection, + ) + graph_node = neo4j.graph.Node( + None, + "123", + "id", + n_labels=["Tool"], + properties={ + "name": name, + "id": id, + "description": description, + "tags": tags, + "version": version, + "section": section, + "sectionID": sectionID, + "subsection": subsection, + }, + ) + self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + + def test_neo_db_parse_node_cre(self): + collection = db.Node_collection() + name = "name" + id = "id" + description = "description" + tags = "tags" + expected = defs.CRE( + name=name, + id=id, + description=description, + tags=tags, + ) + graph_node = neo4j.graph.Node( + None, + "123", + "id", + n_labels=["CRE"], + properties={ + "name": name, + "id": id, + "description": description, + "tags": tags, + }, + ) + self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + + def test_neo_db_parse_node_unknown(self): + collection = db.Node_collection() + name = "name" + id = "id" + description = "description" + tags = "tags" + graph_node = neo4j.graph.Node( + None, + "123", + "id", + n_labels=["ABC"], + properties={ + "name": name, + "id": id, + "description": description, + "tags": tags, + }, + ) + with self.assertRaises(Exception) as cm: + collection.neo_db.parse_node(graph_node) + + self.assertEqual(str(cm.exception), "Unknown node frozenset({'ABC'})") + def test_get_embeddings_by_doc_type_paginated(self): """Given: a range of embedding for Nodes and a range of embeddings for CREs when called with doc_type CRE return the cre embeddings From dea87ed3264aee84dd7365c587623e0897bd45ea Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 15 Sep 2023 15:49:33 +0100 Subject: [PATCH 40/75] make parse_node static --- application/database/db.py | 14 +++++++------- application/tests/db_test.py | 15 +++++---------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index f44c9ed32..f38c25a80 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -366,19 +366,19 @@ def gap_analysis(self, name_1, name_2): def format_segment(seg): return { - "start": self.parse_node(seg.start_node), - "end": self.parse_node(seg.end_node), + "start": NEO_DB.parse_node(seg.start_node), + "end": NEO_DB.parse_node(seg.end_node), "relationship": seg.type, } def format_path_record(rec): return { - "start": self.parse_node(rec.start_node), - "end": self.parse_node(rec.end_node), + "start": NEO_DB.parse_node(rec.start_node), + "end": NEO_DB.parse_node(rec.end_node), "path": [format_segment(seg) for seg in rec.relationships], } - return [self.parse_node(rec["BaseStandard"]) for rec in base_standard], [ + return [NEO_DB.parse_node(rec["BaseStandard"]) for rec in base_standard], [ format_path_record(rec["p"]) for rec in (path_records + path_records_all) ] @@ -392,8 +392,8 @@ def standards(self) -> List[str]: ) return records[0][0] - @classmethod - def parse_node(self, node: neo4j.graph.Node) -> cre_defs.Document: + @staticmethod + def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: name = node["name"] id = node["id"] if "id" in node else None description = node["description"] if "description" in node else None diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 023b0a8dd..3ebe25712 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1310,7 +1310,6 @@ def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): self.assertEqual(collection.gap_analysis(["a", "b"]), expected) def test_neo_db_parse_node_code(self): - collection = db.Node_collection() name = "name" id = "id" description = "description" @@ -1336,10 +1335,9 @@ def test_neo_db_parse_node_code(self): "version": version, }, ) - self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) def test_neo_db_parse_node_standard(self): - collection = db.Node_collection() name = "name" id = "id" description = "description" @@ -1374,10 +1372,9 @@ def test_neo_db_parse_node_standard(self): "subsection": subsection, }, ) - self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) def test_neo_db_parse_node_tool(self): - collection = db.Node_collection() name = "name" id = "id" description = "description" @@ -1412,10 +1409,9 @@ def test_neo_db_parse_node_tool(self): "subsection": subsection, }, ) - self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) def test_neo_db_parse_node_cre(self): - collection = db.Node_collection() name = "name" id = "id" description = "description" @@ -1438,10 +1434,9 @@ def test_neo_db_parse_node_cre(self): "tags": tags, }, ) - self.assertEqual(collection.neo_db.parse_node(graph_node), expected) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) def test_neo_db_parse_node_unknown(self): - collection = db.Node_collection() name = "name" id = "id" description = "description" @@ -1459,7 +1454,7 @@ def test_neo_db_parse_node_unknown(self): }, ) with self.assertRaises(Exception) as cm: - collection.neo_db.parse_node(graph_node) + db.NEO_DB.parse_node(graph_node) self.assertEqual(str(cm.exception), "Unknown node frozenset({'ABC'})") From 32ae469684ccd3c4aee3b865f40de4a2dfc03db3 Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 15 Sep 2023 15:54:37 +0100 Subject: [PATCH 41/75] Use Enum types --- .github/workflows/test.yml | 2 +- application/database/db.py | 8 ++++---- application/prompt_client/prompt_client.py | 1 + application/tests/db_test.py | 8 ++++---- requirements.txt | 5 +++-- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 48feadc48..65d07220c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,4 +13,4 @@ jobs: - name: Install python dependencies run: sudo apt-get update && sudo apt-get install -y python3-setuptools python3-pip && make install-deps - name: Test - run: make test \ No newline at end of file + run: make test diff --git a/application/database/db.py b/application/database/db.py index f38c25a80..ba495a62c 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -400,7 +400,7 @@ def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: # links = [self.parse_link(link) for link in node["links"]] tags = node["tags"] # metadata = node["metadata"] - if "Code" in node.labels: + if cre_defs.Credoctypes.Code.value in node.labels: return cre_defs.Code( name=name, id=id, @@ -411,7 +411,7 @@ def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), version=(node["version"] if "version" in node else None), ) - if "Standard" in node.labels: + if cre_defs.Credoctypes.Standard.value in node.labels: return cre_defs.Standard( name=name, id=id, @@ -425,7 +425,7 @@ def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: sectionID=node["sectionID"], subsection=(node["subsection"] if "subsection" in node else None), ) - if "Tool" in node.labels: + if cre_defs.Credoctypes.Tool.value in node.labels: return cre_defs.Tool( name=name, id=id, @@ -439,7 +439,7 @@ def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: sectionID=node["sectionID"], subsection=(node["subsection"] if "subsection" in node else None), ) - if "CRE" in node.labels: + if cre_defs.Credoctypes.CRE.value in node.labels: return cre_defs.CRE( name=name, id=id, diff --git a/application/prompt_client/prompt_client.py b/application/prompt_client/prompt_client.py index 86fe6ea6a..f7de7bc82 100644 --- a/application/prompt_client/prompt_client.py +++ b/application/prompt_client/prompt_client.py @@ -175,6 +175,7 @@ def generate_embeddings( ) # cls.cre_embeddings[id] = embedding + class PromptHandler: def __init__(self, database: db.Node_collection) -> None: self.ai_client = None diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 3ebe25712..d79671bc2 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1326,7 +1326,7 @@ def test_neo_db_parse_node_code(self): None, "123", "id", - n_labels=["Code"], + n_labels=[defs.Credoctypes.Code.value], properties={ "name": name, "id": id, @@ -1360,7 +1360,7 @@ def test_neo_db_parse_node_standard(self): None, "123", "id", - n_labels=["Standard"], + n_labels=[defs.Credoctypes.Standard.value], properties={ "name": name, "id": id, @@ -1397,7 +1397,7 @@ def test_neo_db_parse_node_tool(self): None, "123", "id", - n_labels=["Tool"], + n_labels=[defs.Credoctypes.Tool.value], properties={ "name": name, "id": id, @@ -1426,7 +1426,7 @@ def test_neo_db_parse_node_cre(self): None, "123", "id", - n_labels=["CRE"], + n_labels=[defs.Credoctypes.CRE.value], properties={ "name": name, "id": id, diff --git a/requirements.txt b/requirements.txt index 8be294463..6659a57bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ compliance-trestle coverage dacite docx==0.2.4 +Flask==2.3.2 Flask_Caching==2.0.2 flask_compress==1.13 Flask_Cors==4.0.0 @@ -26,13 +27,13 @@ python_markdown_maker==1.0 scikit_learn scipy==1.11.2 semver -setuptools==66.1.1 +setuptools==68.2.2 simplify_docx==0.1.2 SQLAlchemy compliance-trestle nose==1.3.7 numpy==1.23.0 -neo4j==5.11.0 +neo4j openapi-schema-validator==0.3.4 openapi-spec-validator==0.5.1 openpyxl==3.1.0 From 873fa0be70f2b31ac8f4659ddf3bfb55fabd8a5b Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 13:22:55 +0100 Subject: [PATCH 42/75] Better route display --- application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 78d8d9aea..fee97db9d 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -31,10 +31,7 @@ const GetSegmentText = (segment, segmentID) => { const text = ( <>
- {arrow}{' '} - - {segment.relationship.replace('_', ' ').toLowerCase()} - + {arrow} {segment.relationship.replace("_", " ").toLowerCase()}
{getDocumentDisplayName(textPart, true)} {textPart.section ?? ''} {textPart.subsection ?? ''}{' '} {textPart.description ?? ''} From bd85837c5ca9ae6f74d2284cdd0a22742c9811e8 Mon Sep 17 00:00:00 2001 From: john681611 Date: Thu, 14 Sep 2023 13:23:16 +0100 Subject: [PATCH 43/75] linting --- application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index fee97db9d..78d8d9aea 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -31,7 +31,10 @@ const GetSegmentText = (segment, segmentID) => { const text = ( <>
- {arrow} {segment.relationship.replace("_", " ").toLowerCase()} + {arrow}{' '} + + {segment.relationship.replace('_', ' ').toLowerCase()} +
{getDocumentDisplayName(textPart, true)} {textPart.section ?? ''} {textPart.subsection ?? ''}{' '} {textPart.description ?? ''} From 58633ae1599b420cb961b3f206023d7f9d23dba1 Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 15 Sep 2023 15:38:29 +0100 Subject: [PATCH 44/75] Minimal GA styling --- .../src/pages/GapAnalysis/GapAnalysis.tsx | 323 ++++++++---------- 1 file changed, 139 insertions(+), 184 deletions(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 78d8d9aea..d145fed18 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -48,6 +48,70 @@ function useQuery() { return React.useMemo(() => new URLSearchParams(search), [search]); } +const GetStrength = (score) => { + if (score < 5) return 'Strong'; + if (score > 20) return 'Weak'; + return 'Average'; +}; + +const GetStrengthColor = (score) => { + if (score < 5) return 'Green'; + if (score > 20) return 'Red'; + return 'Orange'; +}; + +const GetResultLine = (path, gapAnalysis, key) => { + let segmentID = gapAnalysis[key].start.id; + return ( + + {getDocumentDisplayName(path.end, true)} } + > + + {getDocumentDisplayName(gapAnalysis[key].start, true)} + {path.path.map((segment) => { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + })} + + + + ({GetStrength(path.score)}:{path.score}) + + } + > + + Generally: lower is better +
+ {GetStrength(0)}: Closely connected likely to have + majority overlap +
+ {GetStrength(6)}: Connected likely to have partial + overlap +
+ {GetStrength(22)}: Weakly connected likely to have + small or no overlap +
+
+ + + +
+ + ); +}; + export const GapAnalysis = () => { const standardOptionsDefault = [{ key: '', text: '', value: undefined }]; const searchParams = useQuery(); @@ -64,18 +128,6 @@ export const GapAnalysis = () => { const [error, setError] = useState(null); const { apiUrl } = useEnvironment(); - const GetStrength = (score) => { - if (score < 5) return 'Strong'; - if (score > 20) return 'Weak'; - return 'Average'; - }; - - const GetStrengthColor = (score) => { - if (score < 5) return 'Green'; - if (score > 20) return 'Red'; - return 'Orange'; - }; - const GetStrongPathsCount = (paths) => Math.max(Object.values(paths).filter((x) => GetStrength(x.score) === 'Strong').length, 3); @@ -120,12 +172,13 @@ export const GapAnalysis = () => { }; return ( -

- - - -
+ + + + {' '} + Base:{' '} { onChange={(e, { value }) => setBaseStandard(value?.toString())} value={BaseStandard} /> - - - - + + Compare:{' '} { onChange={(e, { value }) => setCompareStandard(value?.toString())} value={CompareStandard} /> - - - - {gapAnalysis && ( - <> - - Generally: lower is better -
- {GetStrength(0)}: Closely connected likely to have - majority overlap -
- {GetStrength(6)}: Connected likely to have partial - overlap -
- {GetStrength(22)}: Weakly connected likely to - have small or no overlap -
-
- - - - - )} - - - {gapAnalysis && ( -
- - - {BaseStandard} - {CompareStandard} - - - - - {Object.keys(gapAnalysis).map((key) => ( - - -

- {getDocumentDisplayName(gapAnalysis[key].start, true)}{' '} - - - -

-
- - {Object.values(gapAnalysis[key].paths) - .sort((a, b) => a.score - b.score) - .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) - .map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - - - {getDocumentDisplayName(path.end, true)} ( - - {GetStrength(path.score)}:{path.score} - - ){' '} - - - - - } - > - - {getDocumentDisplayName(gapAnalysis[key].start, true)} - {path.path.map((segment) => { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - })} - - -
- + {gapAnalysis && ( +
+ +
+ )} + +
+ + + + {gapAnalysis && ( + <> + {Object.keys(gapAnalysis).map((key) => ( + + +

+ {getDocumentDisplayName(gapAnalysis[key].start, true)}{' '} + - - - - {Object.values(gapAnalysis[key].paths) - .sort((a, b) => a.score - b.score) - .slice( - GetStrongPathsCount(gapAnalysis[key].paths), - Object.keys(gapAnalysis[key].paths).length - ) - .map((path) => { - let segmentID = gapAnalysis[key].start.id; - return ( - - - {getDocumentDisplayName(path.end, true)} ( - - {GetStrength(path.score)}:{path.score} - - ){' '} - - - - - } - > - - {getDocumentDisplayName(gapAnalysis[key].start, true)} - {path.path.map((segment) => { - const { text, nextID } = GetSegmentText(segment, segmentID); - segmentID = nextID; - return text; - })} - - -
- - ); - })} - - - )} - {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} - - - ))} - -

- )} + + +

+ + + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) + .map((path) => GetResultLine(path, gapAnalysis, key))} + {Object.keys(gapAnalysis[key].paths).length > 3 && ( + + + + + + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice( + GetStrongPathsCount(gapAnalysis[key].paths), + Object.keys(gapAnalysis[key].paths).length + ) + .map((path) => GetResultLine(path, gapAnalysis, key))} + + + )} + {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} + + + ))} + + )} + +
); }; From aed54ed4d56ae78abc2c7800f559d725c326c356 Mon Sep 17 00:00:00 2001 From: john681611 Date: Tue, 19 Sep 2023 11:10:01 +0100 Subject: [PATCH 45/75] Fix: GA share link spaces --- application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index d145fed18..5353bc846 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -203,7 +203,7 @@ export const GapAnalysis = () => {
)} From e533bf6ab2333c38a5b4681c41c857b869ad7742 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:14:01 +0100 Subject: [PATCH 57/75] Neo4j ORM (#408) * Make Direct Dark Green * Update GapAnalysis.tsx Signed-off-by: John Harvey <10814889+john681611@users.noreply.github.com> * Trial implementation of Neo4J ORM * Populate Neo4j DB using model * More progress in conversion * get ORM working * Fix tests * Remove old NeoDB driver connection * Fix: id issue * Fix: Tags strings splitting * linting fixes * Remove NeomodelPath reference * Revert rebase bug --------- Signed-off-by: John Harvey <10814889+john681611@users.noreply.github.com> --- .gitignore | 1 + README.md | 4 +- application/database/db.py | 427 +++++++++++++++++++---------------- application/tests/db_test.py | 127 +++++------ requirements.txt | 1 + 5 files changed, 296 insertions(+), 264 deletions(-) diff --git a/.gitignore b/.gitignore index d6db6dd2b..a1f5606a8 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ Vagrantfile ## act secrets .secrets/ +.env ### conventions ### venv/ diff --git a/README.md b/README.md index 6e1925678..7ca1ec1c3 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,7 @@ Alternatively, you can use the dockerfile with Some features like Gap Analysis require a neo4j DB running you can start this with
make docker-neo4j
enviroment varaibles for app to connect to neo4jDB (default): -- NEO4J_URI (localhost) -- NEO4J_USR (neo4j) -- NEO4J_PASS (password) +- NEO4J_BOLT_URL (bolt://neo4j:password@localhost:7687) To run the web application for production you need gunicorn and you can run from within the cre_sync dir
make prod-run
diff --git a/application/database/db.py b/application/database/db.py index 4a0fa40c9..f9ca75d8b 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,5 +1,14 @@ -from neo4j import GraphDatabase -import neo4j +from neomodel import ( + config, + StructuredNode, + StringProperty, + UniqueIdProperty, + Relationship, + RelationshipTo, + ArrayProperty, + StructuredRel, + db, +) from sqlalchemy.orm import aliased import os import logging @@ -14,7 +23,6 @@ from application.utils import file from flask_sqlalchemy.model import DefaultMeta from sqlalchemy import func -from sqlalchemy.sql.expression import desc # type: ignore import uuid from application.utils.gap_analysis import get_path_score @@ -160,6 +168,112 @@ class Embeddings(BaseModel): # type: ignore ) +class RelatedRel(StructuredRel): + pass + + +class ContainsRel(StructuredRel): + pass + + +class LinkedToRel(StructuredRel): + pass + + +class SameRel(StructuredRel): + pass + + +class NeoDocument(StructuredNode): + document_id = UniqueIdProperty() + name = StringProperty(required=True) + description = StringProperty(required=True) + tags = ArrayProperty(StringProperty()) + doctype = StringProperty(required=True) + related = Relationship("NeoDocument", "RELATED", model=RelatedRel) + + @classmethod + def to_cre_def(self, node): + raise Exception(f"Shouldn't be parsing a NeoDocument") + + +class NeoNode(NeoDocument): + doctype = StringProperty() + version = StringProperty(required=True) + hyperlink = StringProperty() + + @classmethod + def to_cre_def(self, node): + raise Exception(f"Shouldn't be parsing a NeoNode") + + +class NeoStandard(NeoNode): + section = StringProperty() + subsection = StringProperty(required=True) + section_id = StringProperty() + + @classmethod + def to_cre_def(self, node) -> cre_defs.Standard: + return cre_defs.Standard( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + section=node.section, + sectionID=node.section_id, + subsection=node.subsection, + ) + + +class NeoTool(NeoStandard): + tooltype = StringProperty(required=True) + + @classmethod + def to_cre_def(self, node) -> cre_defs.Tool: + return cre_defs.Tool( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + section=node.section, + sectionID=node.section_id, + subsection=node.subsection, + ) + + +class NeoCode(NeoNode): + @classmethod + def to_cre_def(self, node) -> cre_defs.Code: + return cre_defs.Code( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + ) + + +class NeoCRE(NeoDocument): # type: ignore + external_id = StringProperty() + contains = RelationshipTo("NeoCRE", "CONTAINS", model=ContainsRel) + linked = RelationshipTo("NeoStandard", "LINKED_TO", model=LinkedToRel) + same_as = RelationshipTo("NeoStandard", "SAME", model=SameRel) + + @classmethod + def to_cre_def(self, node) -> cre_defs.CRE: + return cre_defs.CRE( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + ) + + class NEO_DB: __instance = None @@ -171,21 +285,9 @@ def instance(self): if self.__instance is None: self.__instance = self.__new__(self) - URI = os.getenv("NEO4J_URI") or "neo4j://localhost:7687" - AUTH = ( - os.getenv("NEO4J_USR") or "neo4j", - os.getenv("NEO4J_PASS") or "password", + config.DATABASE_URL = ( + os.getenv("NEO4J_BOLT_URL") or "bolt://neo4j:password@localhost:7687" ) - self.driver = GraphDatabase.driver(URI, auth=AUTH) - - try: - self.driver.verify_connectivity() - self.connected = True - except neo4j.exceptions.ServiceUnavailable: - logger.error( - "NEO4J ServiceUnavailable error - disabling neo4j related features" - ) - return self.__instance def __init__(sel): @@ -221,238 +323,173 @@ def populate_DB(self, session) -> nx.Graph: @classmethod def add_cre(self, dbcre: CRE): - if not self.connected: - return - self.driver.execute_query( - "MERGE (n:CRE {id: $nid, name: $name, description: $description, doctype: $doctype, links: $links, metadata: $metadata, tags: $tags})", - name=dbcre.name, - doctype="CRE", # dbcre.ntype, - nid=dbcre.id, - description=dbcre.description, - links=[], # dbcre.links, - tags=dbcre.tags, - metadata="{}", # dbcre.metadata, - database_="neo4j", + NeoCRE.create_or_update( + { + "name": dbcre.name, + "doctype": "CRE", # dbcre.ntype, + "document_id": dbcre.id, + "description": dbcre.description, + "links": [], # dbcre.links, + "tags": [dbcre.tags] if isinstance(dbcre.tags, str) else dbcre.tags, + } ) @classmethod def add_dbnode(self, dbnode: Node): - if not self.connected: - return if dbnode.ntype == "Standard": - self.driver.execute_query( - "MERGE (n:Standard {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink})", - name=dbnode.name, - doctype=dbnode.ntype, - nid=dbnode.id, - description=dbnode.description, - links=[], # dbnode.links, - tags=dbnode.tags, - metadata="{}", # dbnode.metadata, - hyperlink="", # dbnode.hyperlink or "", - version=dbnode.version or "", - section=dbnode.section, - sectionID=dbnode.section_id, # dbnode.sectionID, - subsection=dbnode.subsection or "", - database_="neo4j", + NeoStandard.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description or "", + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + "section": dbnode.section or "", + "section_id": dbnode.section_id or "", + "subsection": dbnode.subsection or "", + } ) return if dbnode.ntype == "Tool": - self.driver.execute_query( - "MERGE (n:Tool {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink, tooltype: $tooltype})", - name=dbnode.name, - doctype=dbnode.ntype, - nid=dbnode.id, - description=dbnode.description, - links=[], # dbnode.links, - tags=dbnode.tags, - metadata="{}", # dbnode.metadata, - hyperlink="", # dbnode.hyperlink or "", - version=dbnode.version or "", - section=dbnode.section, - sectionID=dbnode.section_id, # dbnode.sectionID, - subsection=dbnode.subsection or "", - tooltype="", # dbnode.tooltype, - database_="neo4j", + NeoTool.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description, + "links": [], # dbnode.links, + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "metadata": "{}", # dbnode.metadata, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + "section": dbnode.section, + "section_id": dbnode.section_id, # dbnode.sectionID, + "subsection": dbnode.subsection or "", + "tooltype": "", # dbnode.tooltype, + } ) return if dbnode.ntype == "Code": - self.driver.execute_query( - "MERGE (n:Code {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink})", - name=dbnode.name, - doctype=dbnode.ntype, - nid=dbnode.id, - description=dbnode.description, - links=[], # dbnode.links, - tags=dbnode.tags, - metadata="{}", # dbnode.metadata, - hyperlink="", # dbnode.hyperlink or "", - version=dbnode.version or "", + NeoCode.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description, + "links": [], # dbnode.links, + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "metadata": "{}", # dbnode.metadata, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + } ) return raise Exception(f"Unknown DB type: {dbnode.ntype}") @classmethod def link_CRE_to_CRE(self, id1, id2, link_type): - if not self.connected: + cre1 = NeoCRE.nodes.get(document_id=id1) + cre2 = NeoCRE.nodes.get(document_id=id2) + + if link_type == "Contains": + cre1.contains.connect(cre2) return - self.driver.execute_query( - "MATCH (a:CRE), (b:CRE) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=id1, - bID=id2, - relType=str.upper(link_type).replace(" ", "_"), - database_="neo4j", - ) + if link_type == "Related": + cre1.related.connect(cre2) + return + raise Exception(f"Unknown relation type {link_type}") @classmethod def link_CRE_to_Node(self, CRE_id, node_id, link_type): - if not self.connected: + cre = NeoCRE.nodes.get(document_id=CRE_id) + node = NeoNode.nodes.get(document_id=node_id) + if link_type == "Linked To": + cre.linked.connect(node) return - self.driver.execute_query( - "MATCH (a:CRE), (b:Standard|Tool) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=CRE_id, - bID=node_id, - relType=str.upper(link_type).replace(" ", "_"), - database_="neo4j", - ) + if link_type == "SAME": + cre.same_as.connect(node) + return + raise Exception(f"Unknown relation type {link_type}") @classmethod def gap_analysis(self, name_1, name_2): - if not self.connected: - return None, None - base_standard, _, _ = self.driver.execute_query( - """ - MATCH (BaseStandard:Standard|Tool {name: $name1}) - RETURN BaseStandard - """, - name1=name_1, - database_="neo4j", - ) + base_standard = NeoStandard.nodes.filter(name=name_1) - path_records_all, _, _ = self.driver.execute_query( + path_records_all, _ = db.cypher_query( """ - OPTIONAL MATCH (BaseStandard:Standard|Tool {name: $name1}) - OPTIONAL MATCH (CompareStandard:Standard|Tool {name: $name2}) + OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) + OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) WITH p - WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) RETURN p """, - name1=name_1, - name2=name_2, - database_="neo4j", + {"name1": name_1, "name2": name_2}, + resolve_objects=True, ) - path_records, _, _ = self.driver.execute_query( + + path_records, _ = db.cypher_query( """ - OPTIONAL MATCH (BaseStandard:Standard|Tool {name: $name1}) - OPTIONAL MATCH (CompareStandard:Standard|Tool {name: $name2}) + OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) + OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p - WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) RETURN p """, - name1=name_1, - name2=name_2, - database_="neo4j", + {"name1": name_1, "name2": name_2}, + resolve_objects=True, ) - def format_segment(seg): + def format_segment(seg: StructuredRel, nodes): + relation_map = { + RelatedRel: "RELATED", + ContainsRel: "CONTAINS", + LinkedToRel: "LINKED_TO", + SameRel: "SAME", + } + start_node = [ + node for node in nodes if node.element_id == seg._start_node_element_id + ][0] + end_node = [ + node for node in nodes if node.element_id == seg._end_node_element_id + ][0] + return { - "start": NEO_DB.parse_node(seg.start_node), - "end": NEO_DB.parse_node(seg.end_node), - "relationship": seg.type, + "start": NEO_DB.parse_node(start_node), + "end": NEO_DB.parse_node(end_node), + "relationship": relation_map[type(seg)], } def format_path_record(rec): return { "start": NEO_DB.parse_node(rec.start_node), "end": NEO_DB.parse_node(rec.end_node), - "path": [format_segment(seg) for seg in rec.relationships], + "path": [format_segment(seg, rec.nodes) for seg in rec.relationships], } - return [NEO_DB.parse_node(rec["BaseStandard"]) for rec in base_standard], [ - format_path_record(rec["p"]) for rec in (path_records + path_records_all) + return [NEO_DB.parse_node(rec) for rec in base_standard], [ + format_path_record(rec[0]) for rec in (path_records + path_records_all) ] @classmethod def standards(self) -> List[str]: - if not self.connected: - return - records, _, _ = self.driver.execute_query( - "MATCH (n:Standard|Tool) " "RETURN collect(distinct n.name)", - database_="neo4j", - ) - return records[0][0] + tools = NeoTool.nodes.all() + standards = NeoStandard.nodes.all() - @staticmethod - def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: - name = node["name"] - id = node["id"] if "id" in node else None - description = node["description"] if "description" in node else None - # links = [self.parse_link(link) for link in node["links"]] - tags = node["tags"] - # metadata = node["metadata"] - if cre_defs.Credoctypes.Code.value in node.labels: - return cre_defs.Code( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - ) - if cre_defs.Credoctypes.Standard.value in node.labels: - return cre_defs.Standard( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - section=node["section"], - sectionID=node["sectionID"], - subsection=(node["subsection"] if "subsection" in node else None), - ) - if cre_defs.Credoctypes.Tool.value in node.labels: - return cre_defs.Tool( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - section=node["section"], - sectionID=node["sectionID"], - subsection=(node["subsection"] if "subsection" in node else None), - ) - if cre_defs.Credoctypes.CRE.value in node.labels: - return cre_defs.CRE( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - ) - raise Exception(f"Unknown node {node.labels}") + return list(set([x.name for x in tools] + [x.name for x in standards])) - # @classmethod - # def parse_link(self, link): - # return cre_defs.Link(ltype=link["ltype"], tags=link["tags"]) + @staticmethod + def parse_node(node: NeoDocument) -> cre_defs.Document: + return node.to_cre_def(node) class CRE_Graph: diff --git a/application/tests/db_test.py b/application/tests/db_test.py index d79671bc2..6fafe162d 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1315,25 +1315,22 @@ def test_neo_db_parse_node_code(self): description = "description" tags = "tags" version = "version" + hyperlink = "version" expected = defs.Code( name=name, id=id, description=description, tags=tags, version=version, + hyperlink=hyperlink, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.Code.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - "version": version, - }, + graph_node = db.NeoCode( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + hyperlink=hyperlink, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) @@ -1346,6 +1343,7 @@ def test_neo_db_parse_node_standard(self): section = "section" sectionID = "sectionID" subsection = "subsection" + hyperlink = "version" expected = defs.Standard( name=name, id=id, @@ -1355,22 +1353,18 @@ def test_neo_db_parse_node_standard(self): section=section, sectionID=sectionID, subsection=subsection, + hyperlink=hyperlink, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.Standard.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - "version": version, - "section": section, - "sectionID": sectionID, - "subsection": subsection, - }, + graph_node = db.NeoStandard( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + section=section, + section_id=sectionID, + subsection=subsection, + hyperlink=hyperlink, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) @@ -1383,6 +1377,7 @@ def test_neo_db_parse_node_tool(self): section = "section" sectionID = "sectionID" subsection = "subsection" + hyperlink = "version" expected = defs.Tool( name=name, id=id, @@ -1392,22 +1387,18 @@ def test_neo_db_parse_node_tool(self): section=section, sectionID=sectionID, subsection=subsection, + hyperlink=hyperlink, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.Tool.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - "version": version, - "section": section, - "sectionID": sectionID, - "subsection": subsection, - }, + graph_node = db.NeoTool( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + section=section, + section_id=sectionID, + subsection=subsection, + hyperlink=hyperlink, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) @@ -1422,41 +1413,45 @@ def test_neo_db_parse_node_cre(self): description=description, tags=tags, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.CRE.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - }, + graph_node = db.NeoCRE( + name=name, + document_id=id, + description=description, + tags=tags, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) - def test_neo_db_parse_node_unknown(self): + def test_neo_db_parse_node_Document(self): name = "name" id = "id" description = "description" tags = "tags" - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=["ABC"], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - }, + graph_node = db.NeoDocument( + name=name, + document_id=id, + description=description, + tags=tags, + ) + with self.assertRaises(Exception) as cm: + db.NEO_DB.parse_node(graph_node) + + self.assertEqual(str(cm.exception), "Shouldn't be parsing a NeoDocument") + + def test_neo_db_parse_node_Node(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + graph_node = db.NeoNode( + name=name, + document_id=id, + description=description, + tags=tags, ) with self.assertRaises(Exception) as cm: db.NEO_DB.parse_node(graph_node) - self.assertEqual(str(cm.exception), "Unknown node frozenset({'ABC'})") + self.assertEqual(str(cm.exception), "Shouldn't be parsing a NeoNode") def test_get_embeddings_by_doc_type_paginated(self): """Given: a range of embedding for Nodes and a range of embeddings for CREs diff --git a/requirements.txt b/requirements.txt index 6659a57bf..4fcc0a936 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,7 @@ compliance-trestle nose==1.3.7 numpy==1.23.0 neo4j +neomodel openapi-schema-validator==0.3.4 openapi-spec-validator==0.5.1 openpyxl==3.1.0 From 7bd9bca37fb331f5bfe94334906c001c2c8756a2 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:29:41 +0100 Subject: [PATCH 58/75] Remove BOLT from neo4j Setup requirement (#428) --- README.md | 2 +- application/database/db.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7ca1ec1c3..e82174310 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Alternatively, you can use the dockerfile with Some features like Gap Analysis require a neo4j DB running you can start this with
make docker-neo4j
enviroment varaibles for app to connect to neo4jDB (default): -- NEO4J_BOLT_URL (bolt://neo4j:password@localhost:7687) +- NEO4J_URL (neo4j//neo4j:password@localhost:7687) To run the web application for production you need gunicorn and you can run from within the cre_sync dir
make prod-run
diff --git a/application/database/db.py b/application/database/db.py index f9ca75d8b..3e4c0b293 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -286,7 +286,7 @@ def instance(self): self.__instance = self.__new__(self) config.DATABASE_URL = ( - os.getenv("NEO4J_BOLT_URL") or "bolt://neo4j:password@localhost:7687" + os.getenv("NEO4J_URL") or "neo4j://neo4j:password@localhost:7687" ) return self.__instance From 3790c163742fd76dd832d9c68eda2ac89fed5dc2 Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 11 Oct 2023 18:56:53 +0100 Subject: [PATCH 59/75] pin neomodel to a decent version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4fcc0a936..113dbdb93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,7 +34,7 @@ compliance-trestle nose==1.3.7 numpy==1.23.0 neo4j -neomodel +neomodel==5.1.2 openapi-schema-validator==0.3.4 openapi-spec-validator==0.5.1 openpyxl==3.1.0 From 29804085e170e3c97fa80eb8ad56f569adf2fffd Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 11 Oct 2023 19:00:43 +0100 Subject: [PATCH 60/75] relax six requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 113dbdb93..786a3168a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -89,7 +89,7 @@ ruamel.yaml.clib==0.2.7 scikit-learn==1.2.2 Shapely==1.8.5.post1 simplify-docx==0.1.2 -six==1.15.0 +six smmap==3.0.4 sniffio==1.3.0 soupsieve==2.4.1 From df857aacbfbdc50355a0c0f298aaf6a7816ed8d8 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Sat, 14 Oct 2023 18:10:29 +0100 Subject: [PATCH 61/75] Sort Base standard alphabetically (#427) --- application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index 22271cb14..a251cf86a 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -229,7 +229,10 @@ export const GapAnalysis = () => { {gapAnalysis && ( <> - {Object.keys(gapAnalysis).map((key) => ( + {Object.keys(gapAnalysis) + .sort((a, b) => + getDocumentDisplayName(gapAnalysis[a].start, true).localeCompare(getDocumentDisplayName(gapAnalysis[b].start, true)) + ).map((key) => (
From e33d13d9e1de2514f08fc4d8dadba0177fa4d665 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Tue, 17 Oct 2023 21:08:20 +0100 Subject: [PATCH 62/75] Use allShortestPaths over shortestPath (#431) --- application/database/db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 3e4c0b293..45ea9c70c 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -427,7 +427,7 @@ def gap_analysis(self, name_1, name_2): """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) - OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) + OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) RETURN p @@ -440,7 +440,7 @@ def gap_analysis(self, name_1, name_2): """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) - OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) + OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) RETURN p From 2030d13b4632989f5c63485274032e9e40b78344 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Wed, 18 Oct 2023 16:07:11 +0100 Subject: [PATCH 63/75] Added Cross-cutting concerns to GA blacklist (#430) * Added Cross-cutting concerns to GA blacklist * GA page linting * Remove double brackets * Fix GA loading disappearing * rename variable --- application/database/db.py | 10 +- .../src/pages/GapAnalysis/GapAnalysis.tsx | 104 +++++++++--------- 2 files changed, 59 insertions(+), 55 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 45ea9c70c..ec4254ae8 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -422,17 +422,17 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type): @classmethod def gap_analysis(self, name_1, name_2): base_standard = NeoStandard.nodes.filter(name=name_1) - + denylist = ["Cross-cutting concerns"] path_records_all, _ = db.cypher_query( """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) WITH p - WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) RETURN p """, - {"name1": name_1, "name2": name_2}, + {"name1": name_1, "name2": name_2, "denylist": denylist}, resolve_objects=True, ) @@ -442,10 +442,10 @@ def gap_analysis(self, name_1, name_2): OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p - WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) RETURN p """, - {"name1": name_1, "name2": name_2}, + {"name1": name_1, "name2": name_2, "denylist": denylist}, resolve_objects=True, ) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index a251cf86a..e69d24726 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -127,7 +127,8 @@ export const GapAnalysis = () => { ); const [gapAnalysis, setGapAnalysis] = useState>(); const [activeIndex, SetActiveIndex] = useState(); - const [loading, setLoading] = useState(false); + const [loadingStandards, setLoadingStandards] = useState(false); + const [loadingGA, setLoadingGA] = useState(false); const [error, setError] = useState(null); const { apiUrl } = useEnvironment(); @@ -142,36 +143,36 @@ export const GapAnalysis = () => { useEffect(() => { const fetchData = async () => { const result = await axios.get(`${apiUrl}/standards`); - setLoading(false); + setLoadingStandards(false); setStandardOptions( standardOptionsDefault.concat(result.data.sort().map((x) => ({ key: x, text: x, value: x }))) ); }; - setLoading(true); + setLoadingStandards(true); fetchData().catch((e) => { - setLoading(false); + setLoadingStandards(false); setError(e.response.data.message ?? e.message); }); - }, [setStandardOptions, setLoading, setError]); + }, [setStandardOptions, setLoadingStandards, setError]); useEffect(() => { const fetchData = async () => { const result = await axios.get( `${apiUrl}/map_analysis?standard=${BaseStandard}&standard=${CompareStandard}` ); - setLoading(false); + setLoadingGA(false); setGapAnalysis(result.data); }; if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; setGapAnalysis(undefined); - setLoading(true); + setLoadingGA(true); fetchData().catch((e) => { - setLoading(false); + setLoadingGA(false); setError(e.response.data.message ?? e.message); }); - }, [BaseStandard, CompareStandard, setGapAnalysis, setLoading, setError]); + }, [BaseStandard, CompareStandard, setGapAnalysis, setLoadingGA, setError]); const handleAccordionClick = (e, titleProps) => { const { index } = titleProps; @@ -226,50 +227,53 @@ export const GapAnalysis = () => { - + {gapAnalysis && ( <> {Object.keys(gapAnalysis) - .sort((a, b) => - getDocumentDisplayName(gapAnalysis[a].start, true).localeCompare(getDocumentDisplayName(gapAnalysis[b].start, true)) - ).map((key) => ( - - - -

- {getDocumentDisplayName(gapAnalysis[key].start, true)} -

-
- - - {Object.values(gapAnalysis[key].paths) - .sort((a, b) => a.score - b.score) - .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) - .map((path) => GetResultLine(path, gapAnalysis, key))} - {Object.keys(gapAnalysis[key].paths).length > 3 && ( - - - - - - {Object.values(gapAnalysis[key].paths) - .sort((a, b) => a.score - b.score) - .slice( - GetStrongPathsCount(gapAnalysis[key].paths), - Object.keys(gapAnalysis[key].paths).length - ) - .map((path) => GetResultLine(path, gapAnalysis, key))} - - - )} - {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} - - - ))} + .sort((a, b) => + getDocumentDisplayName(gapAnalysis[a].start, true).localeCompare( + getDocumentDisplayName(gapAnalysis[b].start, true) + ) + ) + .map((key) => ( + + + +

+ {getDocumentDisplayName(gapAnalysis[key].start, true)} +

+
+
+ + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) + .map((path) => GetResultLine(path, gapAnalysis, key))} + {Object.keys(gapAnalysis[key].paths).length > 3 && ( + + + + + + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice( + GetStrongPathsCount(gapAnalysis[key].paths), + Object.keys(gapAnalysis[key].paths).length + ) + .map((path) => GetResultLine(path, gapAnalysis, key))} + + + )} + {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} + +
+ ))} )} From 167828c40955cb0f470c137256cbfb2be2a3d307 Mon Sep 17 00:00:00 2001 From: Spyros Date: Thu, 19 Oct 2023 11:12:28 +0100 Subject: [PATCH 64/75] Revert "Use allShortestPaths over shortestPath (#431)" This reverts commit e33d13d9e1de2514f08fc4d8dadba0177fa4d665. --- application/database/db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index ec4254ae8..503a8d491 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -427,7 +427,7 @@ def gap_analysis(self, name_1, name_2): """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) - OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) + OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) RETURN p @@ -440,7 +440,7 @@ def gap_analysis(self, name_1, name_2): """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) - OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) + OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) RETURN p From f508bcaabd84df34392758349558f9a17a7b237b Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 11:57:12 +0100 Subject: [PATCH 65/75] Precompute map analysis (#433) * add time metrics * progress * Revert "Revert "Use allShortestPaths over shortestPath (#431)"" This reverts commit 167828c40955cb0f470c137256cbfb2be2a3d307. * map analysis through redis * new reqs * add worker actually * abort if background job fails * lint * minor cleanup * move hash to utils and make db method write to redis optionally * make ga job results shared among clients * lint --- Makefile | 32 ++++-- Procfile | 3 +- README.md | 10 +- application/cmd/cre_main.py | 13 +-- application/database/db.py | 101 ++++++++++------ .../src/pages/GapAnalysis/GapAnalysis.tsx | 57 ++++++++- application/tests/cheatsheets_parser_test.py | 2 +- application/tests/db_test.py | 73 +++++++----- .../utils/external_project_parsers/ccmv3.py | 69 ----------- .../cheatsheets_parser.py | 2 +- application/utils/hash.py | 5 + application/web/web_main.py | 108 ++++++++++++++++-- application/worker.py | 22 ++++ cre.py | 6 +- requirements.txt | 3 + 15 files changed, 338 insertions(+), 168 deletions(-) delete mode 100644 application/utils/external_project_parsers/ccmv3.py create mode 100644 application/utils/hash.py create mode 100644 application/worker.py diff --git a/Makefile b/Makefile index 4d4ac9112..f78d2dce6 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,38 @@ .ONESHELL: -.PHONY: dev-run run test covers install-deps dev docker lint frontend clean all +.PHONY: run test covers install-deps dev docker lint frontend clean all prod-run: cp cres/db.sqlite standards_cache.sqlite; gunicorn cre:app --log-file=- -dev-run: - . ./venv/bin/activate && FLASK_APP=cre.py FLASK_CONFIG=development flask run +docker-neo4j: + docker start cre-neo4j 2>/dev/null || docker run -d --name cre-neo4j --env NEO4J_PLUGINS='["apoc"]' --env NEO4J_AUTH=neo4j/password --volume=`pwd`/.neo4j/data:/data --volume=`pwd`/.neo4j/logs:/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 neo4j + +docker-redis: + docker start redis-stack 2>/dev/null || docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest + +start-containers: docker-neo4j docker-redis + +start-worker: + . ./venv/bin/activate + FLASK_APP=`pwd`/cre.py python cre.py --start_worker + +dev-flask: + . ./venv/bin/activate + FLASK_APP=`pwd`/cre.py FLASK_CONFIG=development flask run + e2e: yarn build [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py export FLASK_CONFIG=development - FLASK_CONFIG=development flask run& - + flask run& + yarn test:e2e killall yarn killall flask + test: [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py @@ -45,9 +60,6 @@ docker: docker-run: docker run -it -p 5000:5000 opencre:$(shell git rev-parse HEAD) -docker-neo4j: - docker run --env NEO4J_PLUGINS='["apoc"]' --volume=./neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j - lint: [ -d "./venv" ] && . ./venv/bin/activate && black . && yarn lint @@ -82,4 +94,8 @@ import-all: [ -d "./venv" ] && . ./venv/bin/activate rm -rf standards_cache.sqlite && make migrate-upgrade && export FLASK_APP=$(CURDIR)/cre.py && python cre.py --add --from_spreadsheet https://docs.google.com/spreadsheets/d/1eZOEYgts7d_-Dr-1oAbogPfzBLh6511b58pX3b59kvg && python cre.py --generate_embeddings && python cre.py --zap_in --cheatsheets_in --github_tools_in --capec_in --owasp_secure_headers_in --pci_dss_4_in --juiceshop_in && python cre.py --generate_embeddings +import-neo4j: + [ -d "./venv" ] && . ./venv/bin/activate + export FLASK_APP=$(CURDIR)/cre.py && python cre.py --populate_neo4j_db + all: clean lint test dev dev-run diff --git a/Procfile b/Procfile index 52d23bfd2..8537c1d95 100644 --- a/Procfile +++ b/Procfile @@ -1 +1,2 @@ -web: gunicorn cre:app --log-file=- \ No newline at end of file +web: gunicorn cre:app --log-file=-g +worker: FLASK_APP=`pwd`/cre.py python cre.py --start_worker \ No newline at end of file diff --git a/README.md b/README.md index e82174310..2b7dfd04a 100644 --- a/README.md +++ b/README.md @@ -60,12 +60,18 @@ To add a remote spreadsheet to your local database you can run
python cre.py --add --from_spreadsheet < google sheets url>
To run the web application for development you can run -
make dev-run
+
+$ make start-containers
+$ make start-worker 
+
+# in a seperate shell
+$ make dev-flask
+
Alternatively, you can use the dockerfile with
make docker && make docker-run
-Some features like Gap Analysis require a neo4j DB running you can start this with +Some features like Gap Analysis require a neo4j DB running, you can start this with
make docker-neo4j
enviroment varaibles for app to connect to neo4jDB (default): - NEO4J_URL (neo4j//neo4j:password@localhost:7687) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 0c02f6bb6..d257389ec 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -17,7 +17,6 @@ from application.utils.external_project_parsers import ( capec_parser, cwe, - ccmv3, ccmv4, cheatsheets_parser, misc_tools_parser, @@ -375,14 +374,6 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover if args.export: cache = db_connect(args.cache_file) cache.export(args.export) - if args.csa_ccm_v3_in: - ccmv3.parse_ccm( - ccmFile=sheet_utils.readSpreadsheet( - alias="", - url="https://docs.google.com/spreadsheets/d/1b5i8OV919aiqW2KcYWOQvkLorL1bRPqjthJxLH0QpD8", - ), - cache=db_connect(args.cache_file), - ) if args.csa_ccm_v4_in: ccmv4.parse_ccm( ccmFile=sheet_utils.readSpreadsheet( @@ -428,6 +419,10 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover owasp_metadata_to_cre(args.owasp_proj_meta) if args.populate_neo4j_db: populate_neo4j_db(args.cache_file) + if args.start_worker: + from application.worker import start_worker + + start_worker(args.cache_file) def db_connect(path: str): diff --git a/application/database/db.py b/application/database/db.py index 503a8d491..0a4a54653 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,3 +1,6 @@ +from flask import json as flask_json +import json +import redis from neomodel import ( config, StructuredNode, @@ -16,7 +19,6 @@ from collections import Counter from itertools import permutations from typing import Any, Dict, List, Optional, Tuple, cast - import networkx as nx import yaml from application.defs import cre_defs @@ -26,6 +28,8 @@ import uuid from application.utils.gap_analysis import get_path_score +from application.utils.hash import make_array_hash + from .. import sqla # type: ignore @@ -294,8 +298,7 @@ def __init__(sel): raise ValueError("NEO_DB is a singleton, please call instance() instead") @classmethod - def populate_DB(self, session) -> nx.Graph: - graph = nx.DiGraph() + def populate_DB(self, session): for il in session.query(InternalLinks).all(): group = session.query(CRE).filter(CRE.id == il.group).first() if not group: @@ -319,7 +322,6 @@ def populate_DB(self, session) -> nx.Graph: self.add_cre(cre) self.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type) - return graph @classmethod def add_cre(self, dbcre: CRE): @@ -423,11 +425,15 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type): def gap_analysis(self, name_1, name_2): base_standard = NeoStandard.nodes.filter(name=name_1) denylist = ["Cross-cutting concerns"] + from pprint import pprint + from datetime import datetime + + t1 = datetime.now() path_records_all, _ = db.cypher_query( """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) - OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) + OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) RETURN p @@ -435,12 +441,14 @@ def gap_analysis(self, name_1, name_2): {"name1": name_1, "name2": name_2, "denylist": denylist}, resolve_objects=True, ) - + t2 = datetime.now() + pprint(f"path records all took {t2-t1}") + pprint(path_records_all.__len__()) path_records, _ = db.cypher_query( """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) - OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) + OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) RETURN p @@ -448,6 +456,7 @@ def gap_analysis(self, name_1, name_2): {"name1": name_1, "name2": name_2, "denylist": denylist}, resolve_objects=True, ) + t3 = datetime.now() def format_segment(seg: StructuredRel, nodes): relation_map = { @@ -476,16 +485,24 @@ def format_path_record(rec): "path": [format_segment(seg, rec.nodes) for seg in rec.relationships], } + pprint( + f"path records all took {t2-t1} path records took {t3 - t2}, total: {t3 - t1}" + ) return [NEO_DB.parse_node(rec) for rec in base_standard], [ format_path_record(rec[0]) for rec in (path_records + path_records_all) ] @classmethod def standards(self) -> List[str]: - tools = NeoTool.nodes.all() - standards = NeoStandard.nodes.all() - - return list(set([x.name for x in tools] + [x.name for x in standards])) + tools = [] + for x in db.cypher_query("""MATCH (n:NeoTool) RETURN DISTINCT n.name""")[0]: + tools.extend(x) + standards = [] + for x in db.cypher_query("""MATCH (n:NeoStandard) RETURN DISTINCT n.name""")[ + 0 + ]: # 0 is the results, 1 is the "n.name" param + standards.extend(x) + return list(set([x for x in tools] + [x for x in standards])) @staticmethod def parse_node(node: NeoDocument) -> cre_defs.Document: @@ -1399,28 +1416,6 @@ def find_path_between_nodes( return res - def gap_analysis(self, node_names: List[str]): - base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1]) - if base_standard is None: - return None - grouped_paths = {} - for node in base_standard: - key = node.id - if key not in grouped_paths: - grouped_paths[key] = {"start": node, "paths": {}} - - for path in paths: - key = path["start"].id - end_key = path["end"].id - path["score"] = get_path_score(path) - del path["start"] - if end_key in grouped_paths[key]["paths"]: - if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: - grouped_paths[key]["paths"][end_key] = path - else: - grouped_paths[key]["paths"][end_key] = path - return grouped_paths - def standards(self) -> List[str]: return self.neo_db.standards() @@ -1767,3 +1762,43 @@ def dbCREfromCRE(cre: cre_defs.CRE) -> CRE: external_id=cre.id, tags=",".join(tags), ) + + +def gap_analysis( + neo_db: NEO_DB, + node_names: List[str], + store_in_cache: bool = False, + cache_key: str = "", +): + base_standard, paths = neo_db.gap_analysis(node_names[0], node_names[1]) + if base_standard is None: + return None + grouped_paths = {} + for node in base_standard: + key = node.id + if key not in grouped_paths: + grouped_paths[key] = {"start": node, "paths": {}} + + for path in paths: + key = path["start"].id + end_key = path["end"].id + path["score"] = get_path_score(path) + del path["start"] + if end_key in grouped_paths[key]["paths"]: + if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: + grouped_paths[key]["paths"][end_key] = path + else: + grouped_paths[key]["paths"][end_key] = path + + if ( + store_in_cache + ): # lightweight memory option to not return potentially huge object and instead store in a cache, + # in case this is called via worker, we save both this and the caller memory by avoiding duplicate object in mem + conn = redis.from_url(os.getenv("REDIS_URL", "redis://localhost:6379")) + if cache_key == "": + cache_key = make_array_hash(node_names) + + conn.set(cache_key, flask_json.dumps({"result": grouped_paths})) + return (node_names, {}) + + return (node_names, grouped_paths) diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index e69d24726..c5b517d4c 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,5 +1,5 @@ import axios from 'axios'; -import React, { useEffect, useState } from 'react'; +import React, { useEffect, useRef, useState } from 'react'; import { useLocation } from 'react-router-dom'; import { Accordion, @@ -125,12 +125,14 @@ export const GapAnalysis = () => { const [CompareStandard, setCompareStandard] = useState( searchParams.get('compare') ?? '' ); + const [gaJob, setgaJob] = useState(''); const [gapAnalysis, setGapAnalysis] = useState>(); const [activeIndex, SetActiveIndex] = useState(); const [loadingStandards, setLoadingStandards] = useState(false); const [loadingGA, setLoadingGA] = useState(false); const [error, setError] = useState(null); const { apiUrl } = useEnvironment(); + const timerIdRef = useRef(); const GetStrongPathsCount = (paths) => Math.max( @@ -156,13 +158,62 @@ export const GapAnalysis = () => { }); }, [setStandardOptions, setLoadingStandards, setError]); + useEffect(() => { + console.log('gajob changed, polling'); + const pollingCallback = () => { + const fetchData = async () => { + const result = await axios.get(`${apiUrl}/ma_job_results?id=` + gaJob, { + headers: { + 'Cache-Control': 'no-cache', + Pragma: 'no-cache', + Expires: '0', + }, + }); + if (result.data.result) { + setLoadingGA(false); + setGapAnalysis(result.data.result); + setgaJob(''); + } + }; + if (!gaJob) return; + fetchData().catch((e) => { + setLoadingGA(false); + setError(e.response.data.message ?? e.message); + }); + }; + + const startPolling = () => { + // Polling every 10 seconds + timerIdRef.current = setInterval(pollingCallback, 10000); + }; + const stopPolling = () => { + clearInterval(timerIdRef.current); + }; + + if (gaJob) { + console.log('started polling'); + startPolling(); + } else { + console.log('stoped polling'); + stopPolling(); + } + + return () => { + stopPolling(); + }; + }, [gaJob]); + useEffect(() => { const fetchData = async () => { const result = await axios.get( `${apiUrl}/map_analysis?standard=${BaseStandard}&standard=${CompareStandard}` ); - setLoadingGA(false); - setGapAnalysis(result.data); + if (result.data.result) { + setLoadingGA(false); + setGapAnalysis(result.data.result); + } else if (result.data.job_id) { + setgaJob(result.data.job_id); + } }; if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index 0c5a9bcb1..a543a00c0 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -40,7 +40,7 @@ class Repo: repo_path="https://github.com/foo/bar.git", ) expected = defs.Standard( - name="Cheat_sheets", + name="OWASP Cheat Sheets", hyperlink="https://github.com/foo/bar/tree/master/cs.md", section="Secrets Management Cheat Sheet", ) diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 6fafe162d..6ab6402c9 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1146,7 +1146,7 @@ def test_gap_analysis_disconnected(self, gap_mock): collection.neo_db.connected = False gap_mock.return_value = (None, None) - self.assertEqual(collection.gap_analysis(["a", "b"]), None) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), None) @patch.object(db.NEO_DB, "gap_analysis") def test_gap_analysis_no_nodes(self, gap_mock): @@ -1154,7 +1154,9 @@ def test_gap_analysis_no_nodes(self, gap_mock): collection.neo_db.connected = True gap_mock.return_value = ([], []) - self.assertEqual(collection.gap_analysis(["a", "b"]), {}) + self.assertEqual( + db.gap_analysis(collection.neo_db, ["a", "b"]), (["a", "b"], {}) + ) @patch.object(db.NEO_DB, "gap_analysis") def test_gap_analysis_no_links(self, gap_mock): @@ -1163,8 +1165,8 @@ def test_gap_analysis_no_links(self, gap_mock): gap_mock.return_value = ([defs.CRE(name="bob", id=1)], []) self.assertEqual( - collection.gap_analysis(["a", "b"]), - {1: {"start": defs.CRE(name="bob", id=1), "paths": {}}}, + db.gap_analysis(collection.neo_db, ["a", "b"]), + (["a", "b"], {1: {"start": defs.CRE(name="bob", id=1), "paths": {}}}), ) @patch.object(db.NEO_DB, "gap_analysis") @@ -1193,15 +1195,18 @@ def test_gap_analysis_one_link(self, gap_mock): } ], ) - expected = { - 1: { - "start": defs.CRE(name="bob", id=1), - "paths": { - 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} - }, - } - } - self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + } + }, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) @patch.object(db.NEO_DB, "gap_analysis") def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): @@ -1246,15 +1251,18 @@ def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): }, ], ) - expected = { - 1: { - "start": defs.CRE(name="bob", id=1), - "paths": { - 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} - }, - } - } - self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + } + }, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) @patch.object(db.NEO_DB, "gap_analysis") def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): @@ -1299,15 +1307,18 @@ def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): }, ], ) - expected = { - 1: { - "start": defs.CRE(name="bob", id=1), - "paths": { - 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} - }, - } - } - self.assertEqual(collection.gap_analysis(["a", "b"]), expected) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + } + }, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) def test_neo_db_parse_node_code(self): name = "name" diff --git a/application/utils/external_project_parsers/ccmv3.py b/application/utils/external_project_parsers/ccmv3.py deleted file mode 100644 index c4838f9ed..000000000 --- a/application/utils/external_project_parsers/ccmv3.py +++ /dev/null @@ -1,69 +0,0 @@ -import logging -import os -from pprint import pprint -from typing import Dict, Any -from application.database import db -from application.defs import cre_defs as defs - -from application.database.db import dbCREfromCRE -import re - -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -def make_nist_map(cache: db.Node_collection): - nist_map = {} - re_id = re.compile("(?P\w+-\d+)") - - nist = cache.get_nodes(name="NIST 800-53 v5") - if not nist: - logger.fatal("This CRE DB does not contain NIST, this is fatal") - return - - for nst in nist: - ri = re_id.search(nst.section) - if ri: - nist_map[ri.group("id")] = nst - return nist_map - - -def parse_ccm(ccmFile: Dict[str, Any], cache: db.Node_collection): - nist_map = make_nist_map(cache) - - for ccm_mapping in ccmFile.get("0. ccmv3"): - # cre: defs.CRE - # linked_standard: defs.Standard - if "CCM V3.0 Control ID" not in ccm_mapping: - logger.error("string 'CCM V3.0 Control ID' was not found in mapping line") - continue - - ccm = defs.Standard( - name="Cloud Controls Matrix v3.0", - section=ccm_mapping.pop("CCM V3.0 Control ID"), - subsection="", - version="v3", - hyperlink="", - ) - dbccm = cache.add_node(ccm) - logger.debug(f"Registered CCM with id {ccm.section}") - - if ccm_mapping.get("NIST SP800-53 R3"): - nist_links = ccm_mapping.pop("NIST SP800-53 R3").split("\n") - - for nl in nist_links: - if nl.strip() not in nist_map.keys(): - logger.error(f"could not find NIST '{nl}' in the database") - continue - relevant_cres = [ - el.document - for el in nist_map.get(nl.strip()).links - if el.document.doctype == defs.Credoctypes.CRE - ] - - for c in relevant_cres: - cache.add_link(cre=dbCREfromCRE(cre=c), node=dbccm) - logger.debug( - f"Added link between CRE {c.id} and CCM v3.0 {dbccm.section}" - ) diff --git a/application/utils/external_project_parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/cheatsheets_parser.py index 383b39f4b..52678be36 100644 --- a/application/utils/external_project_parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/cheatsheets_parser.py @@ -9,7 +9,7 @@ def cheatsheet(section: str, hyperlink: str, tags: List[str]) -> defs.Standard: return defs.Standard( - name=f"Cheat_sheets", + name=f"OWASP Cheat Sheets", section=section, tags=tags, hyperlink=hyperlink, diff --git a/application/utils/hash.py b/application/utils/hash.py new file mode 100644 index 000000000..f305dd829 --- /dev/null +++ b/application/utils/hash.py @@ -0,0 +1,5 @@ +import hashlib + + +def make_array_hash(array: list): + return hashlib.md5(":".join(array).encode("utf-8")).hexdigest() diff --git a/application/web/web_main.py b/application/web/web_main.py index 1fc9ed67b..13d71dbc7 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -8,6 +8,8 @@ import urllib.parse from typing import Any from application.utils import oscal_utils +import redis +from rq import Worker, Queue, Connection, job, exceptions from application import cache from application.database import db @@ -17,6 +19,7 @@ from application.utils import mdutils, redirectors from application.prompt_client import prompt_client as prompt_client from enum import Enum +from flask import json as flask_json from flask import ( Blueprint, abort, @@ -33,6 +36,7 @@ from application.utils.spreadsheet import write_csv import oauthlib import google.auth.transport.requests +from application.utils.hash import make_array_hash ITEMS_PER_PAGE = 20 @@ -220,20 +224,106 @@ def find_document_by_tag() -> Any: def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") - gap_analysis = database.gap_analysis(standards) - if gap_analysis is None: - return neo4j_not_running_rejection() - return jsonify(gap_analysis) + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + standards_hash = make_array_hash(standards) + if conn.exists(standards_hash): + gap_analysis_results = conn.get(standards_hash) + if gap_analysis_results: + gap_analysis_dict = json.loads(gap_analysis_results) + if gap_analysis_dict.get("result"): + return jsonify({"result": gap_analysis_dict.get("result")}) + elif gap_analysis_dict.get("job_id"): + return jsonify({"job_id": gap_analysis_dict.get("job_id")}) + + q = Queue(connection=conn) + gap_analysis_job = q.enqueue_call( + db.gap_analysis, + kwargs={ + "neo_db": database.neo_db, + "node_names": standards, + "store_in_cache": True, + "cache_key": standards_hash, + }, + ) + + conn.set(standards_hash, json.dumps({"job_id": gap_analysis_job.id, "result": ""})) + return jsonify({"job_id": gap_analysis_job.id}) + + +@app.route("/rest/v1/ma_job_results", methods=["GET"]) +def fetch_job() -> Any: + logger.info("fetching job results") + jobid = request.args.get("id") + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + try: + res = job.Job.fetch(id=jobid, connection=conn) + except exceptions.NoSuchJobError as nje: + abort(404, "No such job") + + logger.info("job exists") + if res.get_status() == job.JobStatus.FAILED: + abort(500, "background job failed") + elif res.get_status() == job.JobStatus.STOPPED: + abort(500, "background job stopped") + elif res.get_status() == job.JobStatus.CANCELED: + abort(500, "background job canceled") + elif ( + res.get_status() == job.JobStatus.STARTED + or res.get_status() == job.JobStatus.QUEUED + ): + logger.info("but hasn't finished") + return jsonify({"status": res.get_status()}) + + result = res.latest_result() + logger.info("and has finished") + + if res.latest_result().type == result.Type.SUCCESSFUL: + ga_result = result.return_value + logger.info("and has results") + + if len(ga_result) == 2: + standards = ga_result[0] + standards_hash = make_array_hash(standards) + + if conn.exists(standards_hash): + logger.info("and hash is already in cache") + ga = conn.get(standards_hash) + if ga: + logger.info("and results in cache") + ga = json.loads(ga) + if ga.get("result"): + return jsonify({"result": ga.get("result")}) + else: + logger.error( + "Finished job does not have a result object, this is a bug!" + ) + abort(500, "this is a bug, please raise a ticket") + + elif res.latest_result().type == result.Type.FAILED: + logger.error(res.latest_result().exc_string) + abort(500) + else: + logger.warning(f"job stopped? {res.latest_result().type}") + abort(500) @app.route("/rest/v1/standards", methods=["GET"]) @cache.cached(timeout=50) def standards() -> Any: - database = db.Node_collection() - standards = database.standards() - if standards is None: - neo4j_not_running_rejection() - return standards + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + standards = conn.get("NodeNames") + if standards: + return standards + else: + database = db.Node_collection() + standards = database.standards() + if standards is None: + neo4j_not_running_rejection() + conn.set("NodeNames", flask_json.dumps(standards)) + return standards @app.route("/rest/v1/text_search", methods=["GET"]) diff --git a/application/worker.py b/application/worker.py new file mode 100644 index 000000000..2fbdf1f64 --- /dev/null +++ b/application/worker.py @@ -0,0 +1,22 @@ +import os +import redis +from rq import Worker, Queue, Connection +from application.database import db +import logging +from application.cmd.cre_main import db_connect + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +listen = ["high", "default", "low"] + + +def start_worker(cache: str): + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + logger.info(f"Worker Starting") + database = db_connect(path=cache) + with Connection(conn): + worker = Worker(map(Queue, listen)) + worker.work() diff --git a/cre.py b/cre.py index 9593505e6..ac072821f 100644 --- a/cre.py +++ b/cre.py @@ -192,7 +192,11 @@ def main() -> None: action="store_true", help="populate the neo4j db", ) - + parser.add_argument( + "--start_worker", + action="store_true", + help="start redis queue worker", + ) args = parser.parse_args() from application.cmd import cre_main diff --git a/requirements.txt b/requirements.txt index 786a3168a..3927281eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,6 +32,7 @@ simplify_docx==0.1.2 SQLAlchemy compliance-trestle nose==1.3.7 +mypy numpy==1.23.0 neo4j neomodel==5.1.2 @@ -84,6 +85,8 @@ requests==2.27.1 requests-oauthlib==1.3.1 rfc3986==1.5.0 rsa==4.7 +rq==1.15.1 +redis==5.0.1 ruamel.yaml==0.17.21 ruamel.yaml.clib==0.2.7 scikit-learn==1.2.2 From efa968c60c2350c9bd1335b474818ed3eb406b25 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Mon, 23 Oct 2023 19:09:51 +0100 Subject: [PATCH 66/75] Split ga response (#434) * add time metrics * map analysis through redis * move hash to utils and make db method write to redis optionally * lint * Added: Front end support for weaker links calls * Added: DB seperation for smaller inital response and cache extras * Added: DB side of part split response logic * Refactor and tests --------- Co-authored-by: Spyros --- .gitignore | 3 +- application/database/db.py | 47 ++- application/frontend/src/const.ts | 2 + .../src/pages/GapAnalysis/GapAnalysis.tsx | 86 +++--- application/frontend/src/types.ts | 2 + application/tests/db_test.py | 271 +++++++++++++++++- application/tests/web_main_test.py | 139 ++++++++- application/web/web_main.py | 20 +- 8 files changed, 491 insertions(+), 79 deletions(-) diff --git a/.gitignore b/.gitignore index a1f5606a8..ace3d96b2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,5 @@ coverage/ standards_cache.sqlite ### Neo4j -neo4j/ \ No newline at end of file +neo4j/ +.neo4j/ \ No newline at end of file diff --git a/application/database/db.py b/application/database/db.py index 0a4a54653..5ffd454f8 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -494,15 +494,12 @@ def format_path_record(rec): @classmethod def standards(self) -> List[str]: - tools = [] + results = [] for x in db.cypher_query("""MATCH (n:NeoTool) RETURN DISTINCT n.name""")[0]: - tools.extend(x) - standards = [] - for x in db.cypher_query("""MATCH (n:NeoStandard) RETURN DISTINCT n.name""")[ - 0 - ]: # 0 is the results, 1 is the "n.name" param - standards.extend(x) - return list(set([x for x in tools] + [x for x in standards])) + results.extend(x) + for x in db.cypher_query("""MATCH (n:NeoStandard) RETURN DISTINCT n.name""")[0]: + results.extend(x) + return list(set(results)) @staticmethod def parse_node(node: NeoDocument) -> cre_defs.Document: @@ -1774,21 +1771,38 @@ def gap_analysis( if base_standard is None: return None grouped_paths = {} + extra_paths_dict = {} + GA_STRONG_UPPER_LIMIT = 2 + for node in base_standard: key = node.id if key not in grouped_paths: - grouped_paths[key] = {"start": node, "paths": {}} + grouped_paths[key] = {"start": node, "paths": {}, "extra": 0} + extra_paths_dict[key] = {"paths": {}} for path in paths: key = path["start"].id end_key = path["end"].id path["score"] = get_path_score(path) del path["start"] - if end_key in grouped_paths[key]["paths"]: - if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: + if path["score"] <= GA_STRONG_UPPER_LIMIT: + if end_key in extra_paths_dict[key]["paths"]: + del extra_paths_dict[key]["paths"][end_key] + grouped_paths[key]["extra"] -= 1 + if end_key in grouped_paths[key]["paths"]: + if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: + grouped_paths[key]["paths"][end_key] = path + else: grouped_paths[key]["paths"][end_key] = path else: - grouped_paths[key]["paths"][end_key] = path + if end_key in grouped_paths[key]["paths"]: + continue + if end_key in extra_paths_dict[key]: + if extra_paths_dict[key]["paths"][end_key]["score"] > path["score"]: + extra_paths_dict[key]["paths"][end_key] = path + else: + extra_paths_dict[key]["paths"][end_key] = path + grouped_paths[key]["extra"] += 1 if ( store_in_cache @@ -1799,6 +1813,11 @@ def gap_analysis( cache_key = make_array_hash(node_names) conn.set(cache_key, flask_json.dumps({"result": grouped_paths})) - return (node_names, {}) + for key in extra_paths_dict: + conn.set( + cache_key + "->" + key, + flask_json.dumps({"result": extra_paths_dict[key]}), + ) + return (node_names, {}, {}) - return (node_names, grouped_paths) + return (node_names, grouped_paths, extra_paths_dict) diff --git a/application/frontend/src/const.ts b/application/frontend/src/const.ts index 6f176723f..8022b9138 100644 --- a/application/frontend/src/const.ts +++ b/application/frontend/src/const.ts @@ -37,3 +37,5 @@ export const GRAPH = '/graph'; export const DEEPLINK = '/deeplink'; export const BROWSEROOT = '/root_cres'; export const GAP_ANALYSIS = '/map_analysis'; + +export const GA_STRONG_UPPER_LIMIT = 2; // remember to change this in the Python code too diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx index c5b517d4c..4f2d12a5f 100644 --- a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -1,20 +1,10 @@ import axios from 'axios'; -import React, { useEffect, useRef, useState } from 'react'; +import React, { useCallback, useEffect, useRef, useState } from 'react'; import { useLocation } from 'react-router-dom'; -import { - Accordion, - Button, - Container, - Dropdown, - DropdownItemProps, - Grid, - Icon, - Label, - Popup, - Table, -} from 'semantic-ui-react'; +import { Button, Dropdown, DropdownItemProps, Icon, Popup, Table } from 'semantic-ui-react'; import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; +import { GA_STRONG_UPPER_LIMIT } from '../../const'; import { useEnvironment } from '../../hooks'; import { GapAnalysisPathStart } from '../../types'; import { getDocumentDisplayName } from '../../utils'; @@ -51,14 +41,14 @@ function useQuery() { const GetStrength = (score) => { if (score == 0) return 'Direct'; - if (score <= 2) return 'Strong'; + if (score <= GA_STRONG_UPPER_LIMIT) return 'Strong'; if (score >= 20) return 'Weak'; return 'Average'; }; const GetStrengthColor = (score) => { if (score === 0) return 'darkgreen'; - if (score <= 2) return '#93C54B'; + if (score <= GA_STRONG_UPPER_LIMIT) return '#93C54B'; if (score >= 20) return 'Red'; return 'Orange'; }; @@ -100,8 +90,10 @@ const GetResultLine = (path, gapAnalysis, key) => {
{GetStrength(0)}: Directly Linked
- {GetStrength(2)}: Closely connected likely to have - majority overlap + + {GetStrength(GA_STRONG_UPPER_LIMIT)} + + : Closely connected likely to have majority overlap
{GetStrength(6)}: Connected likely to have partial overlap @@ -127,21 +119,12 @@ export const GapAnalysis = () => { ); const [gaJob, setgaJob] = useState(''); const [gapAnalysis, setGapAnalysis] = useState>(); - const [activeIndex, SetActiveIndex] = useState(); const [loadingStandards, setLoadingStandards] = useState(false); const [loadingGA, setLoadingGA] = useState(false); const [error, setError] = useState(null); const { apiUrl } = useEnvironment(); const timerIdRef = useRef(); - const GetStrongPathsCount = (paths) => - Math.max( - Object.values(paths).filter( - (x) => GetStrength(x.score) === 'Strong' || GetStrength(x.score) === 'Direct' - ).length, - 3 - ); - useEffect(() => { const fetchData = async () => { const result = await axios.get(`${apiUrl}/standards`); @@ -225,11 +208,20 @@ export const GapAnalysis = () => { }); }, [BaseStandard, CompareStandard, setGapAnalysis, setLoadingGA, setError]); - const handleAccordionClick = (e, titleProps) => { - const { index } = titleProps; - const newIndex = activeIndex === index ? -1 : index; - SetActiveIndex(newIndex); - }; + const getWeakLinks = useCallback( + async (key) => { + if (!gapAnalysis) return; + const result = await axios.get( + `${apiUrl}/map_analysis_weak_links?standard=${BaseStandard}&standard=${CompareStandard}&key=${key}` + ); + if (result.data.result) { + gapAnalysis[key].weakLinks = result.data.result.paths; + setGapAnalysis(undefined); //THIS HAS TO BE THE WRONG WAY OF DOING THIS + setGapAnalysis(gapAnalysis); + } + }, + [gapAnalysis, setGapAnalysis] + ); return (
@@ -299,29 +291,19 @@ export const GapAnalysis = () => { {Object.values(gapAnalysis[key].paths) .sort((a, b) => a.score - b.score) - .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) .map((path) => GetResultLine(path, gapAnalysis, key))} - {Object.keys(gapAnalysis[key].paths).length > 3 && ( - - - - - - {Object.values(gapAnalysis[key].paths) - .sort((a, b) => a.score - b.score) - .slice( - GetStrongPathsCount(gapAnalysis[key].paths), - Object.keys(gapAnalysis[key].paths).length - ) - .map((path) => GetResultLine(path, gapAnalysis, key))} - - + {gapAnalysis[key].weakLinks && + Object.values(gapAnalysis[key].weakLinks) + .sort((a, b) => a.score - b.score) + .map((path) => GetResultLine(path, gapAnalysis, key))} + {gapAnalysis[key].extra > 0 && !gapAnalysis[key].weakLinks && ( + + )} + {Object.keys(gapAnalysis[key].paths).length === 0 && gapAnalysis[key].extra === 0 && ( + No links Found )} - {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} ))} diff --git a/application/frontend/src/types.ts b/application/frontend/src/types.ts index 60760f837..e373a6d08 100644 --- a/application/frontend/src/types.ts +++ b/application/frontend/src/types.ts @@ -35,4 +35,6 @@ interface GapAnalysisPath { export interface GapAnalysisPathStart { start: Document; paths: Record; + extra: number; + weakLinks: Record; } diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 6ab6402c9..5142d5011 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -3,13 +3,14 @@ import os import tempfile import unittest +from unittest import mock from unittest.mock import patch import uuid from copy import copy, deepcopy from pprint import pprint -from pydoc import doc from typing import Any, Dict, List, Union -import neo4j +import redis +from flask import json as flask_json import yaml from application import create_app, sqla # type: ignore @@ -1155,7 +1156,7 @@ def test_gap_analysis_no_nodes(self, gap_mock): gap_mock.return_value = ([], []) self.assertEqual( - db.gap_analysis(collection.neo_db, ["a", "b"]), (["a", "b"], {}) + db.gap_analysis(collection.neo_db, ["a", "b"]), (["a", "b"], {}, {}) ) @patch.object(db.NEO_DB, "gap_analysis") @@ -1166,7 +1167,11 @@ def test_gap_analysis_no_links(self, gap_mock): gap_mock.return_value = ([defs.CRE(name="bob", id=1)], []) self.assertEqual( db.gap_analysis(collection.neo_db, ["a", "b"]), - (["a", "b"], {1: {"start": defs.CRE(name="bob", id=1), "paths": {}}}), + ( + ["a", "b"], + {1: {"start": defs.CRE(name="bob", id=1), "paths": {}, "extra": 0}}, + {1: {"paths": {}}}, + ), ) @patch.object(db.NEO_DB, "gap_analysis") @@ -1203,9 +1208,61 @@ def test_gap_analysis_one_link(self, gap_mock): "paths": { 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} }, + "extra": 0, + } + }, + {1: {"paths": {}}}, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_one_weak_link(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id=1), + }, + { + "end": defs.CRE(name="bob", id=1), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id=2), + }, + { + "end": defs.CRE(name="bob", id=3), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id=2), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + } + ], + ) + expected = ( + ["a", "b"], + {1: {"start": defs.CRE(name="bob", id=1), "paths": {}, "extra": 1}}, + { + 1: { + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 4} + } } }, ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) @patch.object(db.NEO_DB, "gap_analysis") @@ -1259,8 +1316,75 @@ def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): "paths": { 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} }, - } + "extra": 0, + }, }, + {1: {"paths": {}}}, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_duplicate_link_path_existing_lower_new_in_extras( + self, gap_mock + ): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + ] + path2 = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id="a"), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + }, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path2, + }, + ], + ) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + "extra": 0, + }, + }, + {1: {"paths": {}}}, ) self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) @@ -1315,11 +1439,148 @@ def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): "paths": { 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} }, + "extra": 0, } }, + {1: {"paths": {}}}, ) self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_duplicate_link_path_existing_higher_and_in_extras( + self, gap_mock + ): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + ] + path2 = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id="a"), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path2, + }, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + }, + ], + ) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + "extra": 0, + } + }, + {1: {"paths": {}}}, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + + @patch.object(redis, "from_url") + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_dump_to_cache(self, gap_mock, redis_conn_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id=1), + }, + { + "end": defs.CRE(name="bob", id=1), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id=2), + }, + { + "end": defs.CRE(name="bob", id=3), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id=2), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id="a")], + [ + { + "start": defs.CRE(name="bob", id="a"), + "end": defs.CRE(name="bob", id="b"), + "path": path, + } + ], + ) + + expected_response = ( + ["a", "b"], + {"a": {"start": defs.CRE(name="bob", id="a"), "paths": {}, "extra": 1}}, + { + "a": { + "paths": { + "b": { + "end": defs.CRE(name="bob", id="b"), + "path": path, + "score": 4, + } + } + } + }, + ) + response = db.gap_analysis(collection.neo_db, ["a", "b"], True) + + self.assertEqual(response, (expected_response[0], {}, {})) + + redis_conn_mock.return_value.set.assert_has_calls( + [ + mock.call( + "d8160c9b3dc20d4e931aeb4f45262155", + flask_json.dumps({"result": expected_response[1]}), + ), + mock.call( + "d8160c9b3dc20d4e931aeb4f45262155->a", + flask_json.dumps({"result": expected_response[2]["a"]}), + ), + ] + ) + def test_neo_db_parse_node_code(self): name = "name" id = "id" diff --git a/application/tests/web_main_test.py b/application/tests/web_main_test.py index 30f870c6f..2367fbc9c 100644 --- a/application/tests/web_main_test.py +++ b/application/tests/web_main_test.py @@ -1,18 +1,22 @@ import re import json -import logging -import os -import tempfile import unittest -from pprint import pprint -from typing import Any, Dict, List +from unittest.mock import patch + +import redis +import rq from application import create_app, sqla # type: ignore from application.database import db from application.defs import cre_defs as defs from application.defs import osib_defs from application.web import web_main -from application.utils import mdutils + + +class MockJob: + @property + def id(self): + return "ABC" class TestMain(unittest.TestCase): @@ -568,3 +572,126 @@ def test_smartlink(self) -> None: location = head[1] self.assertEqual(location, "") self.assertEqual(404, response.status_code) + + @patch.object(redis, "from_url") + def test_gap_analysis_from_cache_full_response(self, redis_conn_mock) -> None: + expected = {"result": "hello"} + redis_conn_mock.return_value.exists.return_value = True + redis_conn_mock.return_value.get.return_value = json.dumps(expected) + with self.app.test_client() as client: + response = client.get( + "/rest/v1/map_analysis?standard=aaa&standard=bbb", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(200, response.status_code) + self.assertEqual(expected, json.loads(response.data)) + + @patch.object(rq.Queue, "enqueue_call") + @patch.object(redis, "from_url") + def test_gap_analysis_from_cache_job_id( + self, redis_conn_mock, enqueue_call_mock + ) -> None: + expected = {"job_id": "hello"} + redis_conn_mock.return_value.exists.return_value = True + redis_conn_mock.return_value.get.return_value = json.dumps(expected) + with self.app.test_client() as client: + response = client.get( + "/rest/v1/map_analysis?standard=aaa&standard=bbb", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(200, response.status_code) + self.assertEqual(expected, json.loads(response.data)) + self.assertFalse(enqueue_call_mock.called) + + @patch.object(db, "Node_collection") + @patch.object(rq.Queue, "enqueue_call") + @patch.object(redis, "from_url") + def test_gap_analysis_create_job_id( + self, redis_conn_mock, enqueue_call_mock, db_mock + ) -> None: + expected = {"job_id": "ABC"} + redis_conn_mock.return_value.exists.return_value = False + enqueue_call_mock.return_value = MockJob() + with self.app.test_client() as client: + response = client.get( + "/rest/v1/map_analysis?standard=aaa&standard=bbb", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(200, response.status_code) + self.assertEqual(expected, json.loads(response.data)) + enqueue_call_mock.assert_called_with( + db.gap_analysis, + kwargs={ + "neo_db": db_mock().neo_db, + "node_names": ["aaa", "bbb"], + "store_in_cache": True, + "cache_key": "7aa45d88f69a131890f8e4a769bbb07b", + }, + ) + redis_conn_mock.return_value.set.assert_called_with( + "7aa45d88f69a131890f8e4a769bbb07b", '{"job_id": "ABC", "result": ""}' + ) + + @patch.object(redis, "from_url") + def test_standards_from_cache(self, redis_conn_mock) -> None: + expected = ["A", "B"] + redis_conn_mock.return_value.exists.return_value = True + redis_conn_mock.return_value.get.return_value = json.dumps(expected) + with self.app.test_client() as client: + response = client.get( + "/rest/v1/standards", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(200, response.status_code) + self.assertEqual(expected, json.loads(response.data)) + + @patch.object(redis, "from_url") + @patch.object(db, "Node_collection") + def test_standards_from_db(self, node_mock, redis_conn_mock) -> None: + expected = ["A", "B"] + redis_conn_mock.return_value.get.return_value = None + node_mock.return_value.standards.return_value = expected + with self.app.test_client() as client: + response = client.get( + "/rest/v1/standards", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(200, response.status_code) + self.assertEqual(expected, json.loads(response.data)) + + @patch.object(redis, "from_url") + @patch.object(db, "Node_collection") + def test_standards_from_db_off(self, node_mock, redis_conn_mock) -> None: + expected = { + "message": "Backend services connected to this feature are not running at the moment." + } + redis_conn_mock.return_value.get.return_value = None + node_mock.return_value.standards.return_value = None + with self.app.test_client() as client: + response = client.get( + "/rest/v1/standards", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(500, response.status_code) + self.assertEqual(expected, json.loads(response.data)) + + def test_gap_analysis_weak_links_no_cache(self) -> None: + with self.app.test_client() as client: + response = client.get( + "/rest/v1/map_analysis_weak_links?standard=aaa&standard=bbb&key=ccc`", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(404, response.status_code) + + @patch.object(redis, "from_url") + def test_gap_analysis_weak_links_response(self, redis_conn_mock) -> None: + expected = {"result": "hello"} + redis_conn_mock.return_value.exists.return_value = True + redis_conn_mock.return_value.get.return_value = json.dumps(expected) + with self.app.test_client() as client: + response = client.get( + "/rest/v1/map_analysis_weak_links?standard=aaa&standard=bbb&key=ccc`", + headers={"Content-Type": "application/json"}, + ) + self.assertEqual(200, response.status_code) + self.assertEqual(expected, json.loads(response.data)) diff --git a/application/web/web_main.py b/application/web/web_main.py index 13d71dbc7..9361ad440 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -251,6 +251,24 @@ def gap_analysis() -> Any: return jsonify({"job_id": gap_analysis_job.id}) +@app.route("/rest/v1/map_analysis_weak_links", methods=["GET"]) +@cache.cached(timeout=50, query_string=True) +def gap_analysis_weak_links() -> Any: + standards = request.args.getlist("standard") + key = request.args.get("key") + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + standards_hash = make_array_hash(standards) + cache_key = standards_hash + "->" + key + if conn.exists(cache_key): + gap_analysis_results = conn.get(cache_key) + if gap_analysis_results: + gap_analysis_dict = json.loads(gap_analysis_results) + if gap_analysis_dict.get("result"): + return jsonify({"result": gap_analysis_dict.get("result")}) + abort(404, "No such Cache") + + @app.route("/rest/v1/ma_job_results", methods=["GET"]) def fetch_job() -> Any: logger.info("fetching job results") @@ -321,7 +339,7 @@ def standards() -> Any: database = db.Node_collection() standards = database.standards() if standards is None: - neo4j_not_running_rejection() + return neo4j_not_running_rejection() conn.set("NodeNames", flask_json.dumps(standards)) return standards From 605aef169dcc975e20220b1592d9061f2d99bc23 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 19:46:06 +0100 Subject: [PATCH 67/75] skip returning ids for failed jobs --- application/web/web_main.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/application/web/web_main.py b/application/web/web_main.py index 9361ad440..351a3089a 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -234,8 +234,15 @@ def gap_analysis() -> Any: if gap_analysis_dict.get("result"): return jsonify({"result": gap_analysis_dict.get("result")}) elif gap_analysis_dict.get("job_id"): - return jsonify({"job_id": gap_analysis_dict.get("job_id")}) - + try: + res = job.Job.fetch(id=gap_analysis_dict.get("job_id"), connection=conn) + except exceptions.NoSuchJobError as nje: + abort(404, "No such job") + if res.get_status() != job.JobStatus.FAILED and\ + res.get_status() == job.JobStatus.STOPPED and\ + res.get_status() == job.JobStatus.CANCELED: + logger.info("gap analysis job id already exists, returning early") + return jsonify({"job_id": gap_analysis_dict.get("job_id")}) q = Queue(connection=conn) gap_analysis_job = q.enqueue_call( db.gap_analysis, From 6594529d40d96fcaa7c8587e621ead3edf6bc32d Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 20:44:43 +0100 Subject: [PATCH 68/75] add optional tls to redis, heroku requirement --- application/database/db.py | 1 + application/utils/redis.py | 17 +++++++++++++++++ application/web/web_main.py | 28 ++++++++++++++-------------- application/worker.py | 4 ++-- 4 files changed, 34 insertions(+), 16 deletions(-) create mode 100644 application/utils/redis.py diff --git a/application/database/db.py b/application/database/db.py index 5ffd454f8..6fc4e802f 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1808,6 +1808,7 @@ def gap_analysis( store_in_cache ): # lightweight memory option to not return potentially huge object and instead store in a cache, # in case this is called via worker, we save both this and the caller memory by avoiding duplicate object in mem + conn = redis.from_url(os.getenv("REDIS_URL", "redis://localhost:6379")) if cache_key == "": cache_key = make_array_hash(node_names) diff --git a/application/utils/redis.py b/application/utils/redis.py new file mode 100644 index 000000000..769d9d5f6 --- /dev/null +++ b/application/utils/redis.py @@ -0,0 +1,17 @@ +import redis +import os +from urllib.parse import urlparse + + +def connect(): + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + + url = urlparse(redis_url) + r = redis.Redis( + host=url.hostname, + port=url.port, + password=url.password, + ssl=True, + ssl_cert_reqs=None, + ) + return r diff --git a/application/web/web_main.py b/application/web/web_main.py index 351a3089a..90b192838 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -7,8 +7,8 @@ import pathlib import urllib.parse from typing import Any -from application.utils import oscal_utils -import redis +from application.utils import oscal_utils, redis + from rq import Worker, Queue, Connection, job, exceptions from application import cache @@ -224,8 +224,7 @@ def find_document_by_tag() -> Any: def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") - redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") - conn = redis.from_url(redis_url) + conn = redis.connect() standards_hash = make_array_hash(standards) if conn.exists(standards_hash): gap_analysis_results = conn.get(standards_hash) @@ -235,12 +234,16 @@ def gap_analysis() -> Any: return jsonify({"result": gap_analysis_dict.get("result")}) elif gap_analysis_dict.get("job_id"): try: - res = job.Job.fetch(id=gap_analysis_dict.get("job_id"), connection=conn) + res = job.Job.fetch( + id=gap_analysis_dict.get("job_id"), connection=conn + ) except exceptions.NoSuchJobError as nje: abort(404, "No such job") - if res.get_status() != job.JobStatus.FAILED and\ - res.get_status() == job.JobStatus.STOPPED and\ - res.get_status() == job.JobStatus.CANCELED: + if ( + res.get_status() != job.JobStatus.FAILED + and res.get_status() == job.JobStatus.STOPPED + and res.get_status() == job.JobStatus.CANCELED + ): logger.info("gap analysis job id already exists, returning early") return jsonify({"job_id": gap_analysis_dict.get("job_id")}) q = Queue(connection=conn) @@ -263,8 +266,7 @@ def gap_analysis() -> Any: def gap_analysis_weak_links() -> Any: standards = request.args.getlist("standard") key = request.args.get("key") - redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") - conn = redis.from_url(redis_url) + conn = redis.connect() standards_hash = make_array_hash(standards) cache_key = standards_hash + "->" + key if conn.exists(cache_key): @@ -280,8 +282,7 @@ def gap_analysis_weak_links() -> Any: def fetch_job() -> Any: logger.info("fetching job results") jobid = request.args.get("id") - redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") - conn = redis.from_url(redis_url) + conn = redis.connect() try: res = job.Job.fetch(id=jobid, connection=conn) except exceptions.NoSuchJobError as nje: @@ -337,8 +338,7 @@ def fetch_job() -> Any: @app.route("/rest/v1/standards", methods=["GET"]) @cache.cached(timeout=50) def standards() -> Any: - redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") - conn = redis.from_url(redis_url) + conn = redis.connect() standards = conn.get("NodeNames") if standards: return standards diff --git a/application/worker.py b/application/worker.py index 2fbdf1f64..104e05ebe 100644 --- a/application/worker.py +++ b/application/worker.py @@ -4,6 +4,7 @@ from application.database import db import logging from application.cmd.cre_main import db_connect +from application.utils import redis logging.basicConfig() logger = logging.getLogger(__name__) @@ -13,8 +14,7 @@ def start_worker(cache: str): - redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") - conn = redis.from_url(redis_url) + conn = redis.connect() logger.info(f"Worker Starting") database = db_connect(path=cache) with Connection(conn): From 562a48d582cda7076d56487c1bdbf3fd83cfb8f6 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 20:57:30 +0100 Subject: [PATCH 69/75] add optional tls to redis, heroku requirement --- application/database/db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 6fc4e802f..e68292890 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,6 +1,6 @@ from flask import json as flask_json import json -import redis +from application.utils import redis from neomodel import ( config, StructuredNode, @@ -1809,7 +1809,7 @@ def gap_analysis( ): # lightweight memory option to not return potentially huge object and instead store in a cache, # in case this is called via worker, we save both this and the caller memory by avoiding duplicate object in mem - conn = redis.from_url(os.getenv("REDIS_URL", "redis://localhost:6379")) + conn = redis.connect() if cache_key == "": cache_key = make_array_hash(node_names) From a499b06453de0bcd33b23a2d8ccd55bbadf06253 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 21:16:25 +0100 Subject: [PATCH 70/75] add optional tls to redis, heroku requirement --- application/web/web_main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/application/web/web_main.py b/application/web/web_main.py index 90b192838..5fc9f6611 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -326,7 +326,8 @@ def fetch_job() -> Any: "Finished job does not have a result object, this is a bug!" ) abort(500, "this is a bug, please raise a ticket") - + + reutrn jsonify({"status": res.get_status()}) elif res.latest_result().type == result.Type.FAILED: logger.error(res.latest_result().exc_string) abort(500) From 0d4524e705ac3fba948ae01914b05ed3d53bd2a4 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 21:34:26 +0100 Subject: [PATCH 71/75] add optional tls to redis, heroku requirement --- application/web/web_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/web/web_main.py b/application/web/web_main.py index 5fc9f6611..8695aa740 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -327,7 +327,7 @@ def fetch_job() -> Any: ) abort(500, "this is a bug, please raise a ticket") - reutrn jsonify({"status": res.get_status()}) + return jsonify({"status": res.get_status()}) elif res.latest_result().type == result.Type.FAILED: logger.error(res.latest_result().exc_string) abort(500) From a086e4f9ab9465f49926f667f6b7edec70b7b315 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 21:45:19 +0100 Subject: [PATCH 72/75] add optional tls to redis, heroku requirement --- application/web/web_main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/application/web/web_main.py b/application/web/web_main.py index 8695aa740..1eff72b4c 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -326,7 +326,8 @@ def fetch_job() -> Any: "Finished job does not have a result object, this is a bug!" ) abort(500, "this is a bug, please raise a ticket") - + from pprint import pprint + pprint(ga_result) return jsonify({"status": res.get_status()}) elif res.latest_result().type == result.Type.FAILED: logger.error(res.latest_result().exc_string) From cda463f8aa0b3afd84a9094c0d865178274c4c96 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 23 Oct 2023 21:54:11 +0100 Subject: [PATCH 73/75] add optional tls to redis, heroku requirement --- application/web/web_main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/application/web/web_main.py b/application/web/web_main.py index 1eff72b4c..75deab6dd 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -309,7 +309,7 @@ def fetch_job() -> Any: ga_result = result.return_value logger.info("and has results") - if len(ga_result) == 2: + if len(ga_result) > 1: standards = ga_result[0] standards_hash = make_array_hash(standards) @@ -326,8 +326,6 @@ def fetch_job() -> Any: "Finished job does not have a result object, this is a bug!" ) abort(500, "this is a bug, please raise a ticket") - from pprint import pprint - pprint(ga_result) return jsonify({"status": res.get_status()}) elif res.latest_result().type == result.Type.FAILED: logger.error(res.latest_result().exc_string) From 8111c40b9397a4310a8da790ea1e6d2ba705eb68 Mon Sep 17 00:00:00 2001 From: Spyros Date: Tue, 24 Oct 2023 19:36:28 +0100 Subject: [PATCH 74/75] use db to cache gap analysis results instead of redis (#435) * use db to cache gap analysis results instead of redis * lint * typo * fix 1 test, fix bugs, make cache key into its own function * lint * fix-mock * migration * Fix: bad ref * fix tests * lint * minor changes * lint --------- Co-authored-by: john681611 --- application/database/db.py | 50 ++++++++++--- application/tests/db_test.py | 22 ++---- application/tests/web_main_test.py | 24 ++++-- application/utils/hash.py | 4 + application/utils/redis.py | 21 +++--- application/web/web_main.py | 75 +++++++++++-------- ...2946f9_cache_gap_analysis_results_in_db.py | 72 ++++++++++++++++++ 7 files changed, 192 insertions(+), 76 deletions(-) create mode 100644 migrations/versions/5029c02946f9_cache_gap_analysis_results_in_db.py diff --git a/application/database/db.py b/application/database/db.py index e68292890..36ca3fcbd 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -28,7 +28,7 @@ import uuid from application.utils.gap_analysis import get_path_score -from application.utils.hash import make_array_hash +from application.utils.hash import make_array_hash, make_cache_key from .. import sqla # type: ignore @@ -172,6 +172,13 @@ class Embeddings(BaseModel): # type: ignore ) +class GapAnalysisResults(BaseModel): + __tablename__ = "gap_analysis_results" + cache_key = sqla.Column(sqla.String, primary_key=True) + ga_object = sqla.Column(sqla.String) + __table_args__ = (sqla.UniqueConstraint(cache_key, name="unique_cache_key_field"),) + + class RelatedRel(StructuredRel): pass @@ -425,7 +432,6 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type): def gap_analysis(self, name_1, name_2): base_standard = NeoStandard.nodes.filter(name=name_1) denylist = ["Cross-cutting concerns"] - from pprint import pprint from datetime import datetime t1 = datetime.now() @@ -442,8 +448,6 @@ def gap_analysis(self, name_1, name_2): resolve_objects=True, ) t2 = datetime.now() - pprint(f"path records all took {t2-t1}") - pprint(path_records_all.__len__()) path_records, _ = db.cypher_query( """ OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) @@ -485,9 +489,6 @@ def format_path_record(rec): "path": [format_segment(seg, rec.nodes) for seg in rec.relationships], } - pprint( - f"path records all took {t2-t1} path records took {t3 - t2}, total: {t3 - t1}" - ) return [NEO_DB.parse_node(rec) for rec in base_standard], [ format_path_record(rec[0]) for rec in (path_records + path_records_all) ] @@ -1635,6 +1636,22 @@ def add_embedding( return existing + def get_gap_analysis_result(self, cache_key) -> str: + res = ( + self.session.query(GapAnalysisResults) + .filter(GapAnalysisResults.cache_key == cache_key) + .first() + ) + if res: + return res.ga_object + + def add_gap_analysis_result(self, cache_key: str, ga_object: str): + existing = self.get_gap_analysis_result(cache_key) + if not existing: + res = GapAnalysisResults(cache_key=cache_key, ga_object=ga_object) + self.session.add(res) + self.session.commit() + def dbNodeFromNode(doc: cre_defs.Node) -> Optional[Node]: if doc.doctype == cre_defs.Credoctypes.Standard: @@ -1767,6 +1784,7 @@ def gap_analysis( store_in_cache: bool = False, cache_key: str = "", ): + cre_db = Node_collection() base_standard, paths = neo_db.gap_analysis(node_names[0], node_names[1]) if base_standard is None: return None @@ -1809,16 +1827,24 @@ def gap_analysis( ): # lightweight memory option to not return potentially huge object and instead store in a cache, # in case this is called via worker, we save both this and the caller memory by avoiding duplicate object in mem - conn = redis.connect() + # conn = redis.connect() if cache_key == "": cache_key = make_array_hash(node_names) - conn.set(cache_key, flask_json.dumps({"result": grouped_paths})) + # conn.set(cache_key, flask_json.dumps({"result": grouped_paths})) + cre_db.add_gap_analysis_result( + cache_key=cache_key, ga_object=flask_json.dumps({"result": grouped_paths}) + ) + for key in extra_paths_dict: - conn.set( - cache_key + "->" + key, - flask_json.dumps({"result": extra_paths_dict[key]}), + cre_db.add_gap_analysis_result( + cache_key=make_cache_key(node_names, key), + ga_object=flask_json.dumps({"result": extra_paths_dict[key]}), ) + # conn.set( + # cache_key + "->" + key, + # flask_json.dumps({"result": extra_paths_dict[key]}), + # ) return (node_names, {}, {}) return (node_names, grouped_paths, extra_paths_dict) diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 5142d5011..6439e4f5b 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1511,9 +1511,8 @@ def test_gap_analysis_duplicate_link_path_existing_higher_and_in_extras( ) self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) - @patch.object(redis, "from_url") @patch.object(db.NEO_DB, "gap_analysis") - def test_gap_analysis_dump_to_cache(self, gap_mock, redis_conn_mock): + def test_gap_analysis_dump_to_cache(self, gap_mock): collection = db.Node_collection() collection.neo_db.connected = True path = [ @@ -1567,18 +1566,13 @@ def test_gap_analysis_dump_to_cache(self, gap_mock, redis_conn_mock): response = db.gap_analysis(collection.neo_db, ["a", "b"], True) self.assertEqual(response, (expected_response[0], {}, {})) - - redis_conn_mock.return_value.set.assert_has_calls( - [ - mock.call( - "d8160c9b3dc20d4e931aeb4f45262155", - flask_json.dumps({"result": expected_response[1]}), - ), - mock.call( - "d8160c9b3dc20d4e931aeb4f45262155->a", - flask_json.dumps({"result": expected_response[2]["a"]}), - ), - ] + self.assertEqual( + collection.get_gap_analysis_result("d8160c9b3dc20d4e931aeb4f45262155"), + flask_json.dumps({"result": expected_response[1]}), + ) + self.assertEqual( + collection.get_gap_analysis_result("d8160c9b3dc20d4e931aeb4f45262155->a"), + flask_json.dumps({"result": expected_response[2]["a"]}), ) def test_neo_db_parse_node_code(self): diff --git a/application/tests/web_main_test.py b/application/tests/web_main_test.py index 2367fbc9c..ccb7ad7d3 100644 --- a/application/tests/web_main_test.py +++ b/application/tests/web_main_test.py @@ -11,6 +11,7 @@ from application.defs import cre_defs as defs from application.defs import osib_defs from application.web import web_main +from application.utils.hash import make_array_hash, make_cache_key class MockJob: @@ -18,6 +19,9 @@ class MockJob: def id(self): return "ABC" + def get_status(self): + return rq.job.JobStatus.STARTED + class TestMain(unittest.TestCase): def tearDown(self) -> None: @@ -574,10 +578,14 @@ def test_smartlink(self) -> None: self.assertEqual(404, response.status_code) @patch.object(redis, "from_url") - def test_gap_analysis_from_cache_full_response(self, redis_conn_mock) -> None: + @patch.object(db, "Node_collection") + def test_gap_analysis_from_cache_full_response( + self, db_mock, redis_conn_mock + ) -> None: expected = {"result": "hello"} redis_conn_mock.return_value.exists.return_value = True redis_conn_mock.return_value.get.return_value = json.dumps(expected) + db_mock.return_value.get_gap_analysis_result.return_value = json.dumps(expected) with self.app.test_client() as client: response = client.get( "/rest/v1/map_analysis?standard=aaa&standard=bbb", @@ -586,14 +594,16 @@ def test_gap_analysis_from_cache_full_response(self, redis_conn_mock) -> None: self.assertEqual(200, response.status_code) self.assertEqual(expected, json.loads(response.data)) + @patch.object(rq.job.Job, "fetch") @patch.object(rq.Queue, "enqueue_call") @patch.object(redis, "from_url") def test_gap_analysis_from_cache_job_id( - self, redis_conn_mock, enqueue_call_mock + self, redis_conn_mock, enqueue_call_mock, fetch_mock ) -> None: expected = {"job_id": "hello"} redis_conn_mock.return_value.exists.return_value = True redis_conn_mock.return_value.get.return_value = json.dumps(expected) + fetch_mock.return_value = MockJob() with self.app.test_client() as client: response = client.get( "/rest/v1/map_analysis?standard=aaa&standard=bbb", @@ -610,8 +620,9 @@ def test_gap_analysis_create_job_id( self, redis_conn_mock, enqueue_call_mock, db_mock ) -> None: expected = {"job_id": "ABC"} - redis_conn_mock.return_value.exists.return_value = False + redis_conn_mock.return_value.get.return_value = None enqueue_call_mock.return_value = MockJob() + db_mock.return_value.get_gap_analysis_result.return_value = None with self.app.test_client() as client: response = client.get( "/rest/v1/map_analysis?standard=aaa&standard=bbb", @@ -683,11 +694,10 @@ def test_gap_analysis_weak_links_no_cache(self) -> None: ) self.assertEqual(404, response.status_code) - @patch.object(redis, "from_url") - def test_gap_analysis_weak_links_response(self, redis_conn_mock) -> None: + @patch.object(db, "Node_collection") + def test_gap_analysis_weak_links_response(self, db_mock) -> None: expected = {"result": "hello"} - redis_conn_mock.return_value.exists.return_value = True - redis_conn_mock.return_value.get.return_value = json.dumps(expected) + db_mock.return_value.get_gap_analysis_result.return_value = json.dumps(expected) with self.app.test_client() as client: response = client.get( "/rest/v1/map_analysis_weak_links?standard=aaa&standard=bbb&key=ccc`", diff --git a/application/utils/hash.py b/application/utils/hash.py index f305dd829..6b4f7b400 100644 --- a/application/utils/hash.py +++ b/application/utils/hash.py @@ -1,5 +1,9 @@ import hashlib +def make_cache_key(standards: list, key: str) -> str: + return make_array_hash(standards) + "->" + key + + def make_array_hash(array: list): return hashlib.md5(":".join(array).encode("utf-8")).hexdigest() diff --git a/application/utils/redis.py b/application/utils/redis.py index 769d9d5f6..189608967 100644 --- a/application/utils/redis.py +++ b/application/utils/redis.py @@ -5,13 +5,14 @@ def connect(): redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") - - url = urlparse(redis_url) - r = redis.Redis( - host=url.hostname, - port=url.port, - password=url.password, - ssl=True, - ssl_cert_reqs=None, - ) - return r + if redis_url == "redis://localhost:6379": + return redis.from_url(redis_url) + else: + url = urlparse(redis_url) + return redis.Redis( + host=url.hostname, + port=url.port, + password=url.password, + ssl=True, + ssl_cert_reqs=None, + ) diff --git a/application/web/web_main.py b/application/web/web_main.py index 75deab6dd..73964e815 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -36,7 +36,7 @@ from application.utils.spreadsheet import write_csv import oauthlib import google.auth.transport.requests -from application.utils.hash import make_array_hash +from application.utils.hash import make_array_hash, make_cache_key ITEMS_PER_PAGE = 20 @@ -226,26 +226,27 @@ def gap_analysis() -> Any: standards = request.args.getlist("standard") conn = redis.connect() standards_hash = make_array_hash(standards) - if conn.exists(standards_hash): - gap_analysis_results = conn.get(standards_hash) - if gap_analysis_results: - gap_analysis_dict = json.loads(gap_analysis_results) - if gap_analysis_dict.get("result"): - return jsonify({"result": gap_analysis_dict.get("result")}) - elif gap_analysis_dict.get("job_id"): - try: - res = job.Job.fetch( - id=gap_analysis_dict.get("job_id"), connection=conn - ) - except exceptions.NoSuchJobError as nje: - abort(404, "No such job") - if ( - res.get_status() != job.JobStatus.FAILED - and res.get_status() == job.JobStatus.STOPPED - and res.get_status() == job.JobStatus.CANCELED - ): - logger.info("gap analysis job id already exists, returning early") - return jsonify({"job_id": gap_analysis_dict.get("job_id")}) + result = database.get_gap_analysis_result(standards_hash) + if result: + gap_analysis_dict = flask_json.loads(result) + if gap_analysis_dict.get("result"): + return jsonify(gap_analysis_dict) + + gap_analysis_results = conn.get(standards_hash) + if gap_analysis_results: + gap_analysis_dict = json.loads(gap_analysis_results) + if gap_analysis_dict.get("job_id"): + try: + res = job.Job.fetch(id=gap_analysis_dict.get("job_id"), connection=conn) + except exceptions.NoSuchJobError as nje: + abort(404, "No such job") + if ( + res.get_status() != job.JobStatus.FAILED + and res.get_status() != job.JobStatus.STOPPED + and res.get_status() != job.JobStatus.CANCELED + ): + logger.info("gap analysis job id already exists, returning early") + return jsonify({"job_id": gap_analysis_dict.get("job_id")}) q = Queue(connection=conn) gap_analysis_job = q.enqueue_call( db.gap_analysis, @@ -266,15 +267,21 @@ def gap_analysis() -> Any: def gap_analysis_weak_links() -> Any: standards = request.args.getlist("standard") key = request.args.get("key") - conn = redis.connect() - standards_hash = make_array_hash(standards) - cache_key = standards_hash + "->" + key - if conn.exists(cache_key): - gap_analysis_results = conn.get(cache_key) - if gap_analysis_results: - gap_analysis_dict = json.loads(gap_analysis_results) - if gap_analysis_dict.get("result"): - return jsonify({"result": gap_analysis_dict.get("result")}) + cache_key = make_cache_key(standards=standards, key=key) + + database = db.Node_collection() + gap_analysis_results = database.get_gap_analysis_result(cache_key=cache_key) + if gap_analysis_results: + gap_analysis_dict = json.loads(gap_analysis_results) + if gap_analysis_dict.get("result"): + return jsonify({"result": gap_analysis_dict.get("result")}) + + # if conn.exists(cache_key): + # gap_analysis_results = conn.get(cache_key) + # if gap_analysis_results: + # gap_analysis_dict = json.loads(gap_analysis_results) + # if gap_analysis_dict.get("result"): + # return jsonify({"result": gap_analysis_dict.get("result")}) abort(404, "No such Cache") @@ -315,12 +322,14 @@ def fetch_job() -> Any: if conn.exists(standards_hash): logger.info("and hash is already in cache") - ga = conn.get(standards_hash) + # ga = conn.get(standards_hash) + database = db.Node_collection() + ga = database.get_gap_analysis_result(standards_hash) if ga: logger.info("and results in cache") - ga = json.loads(ga) + ga = flask_json.loads(ga) if ga.get("result"): - return jsonify({"result": ga.get("result")}) + return jsonify(ga) else: logger.error( "Finished job does not have a result object, this is a bug!" diff --git a/migrations/versions/5029c02946f9_cache_gap_analysis_results_in_db.py b/migrations/versions/5029c02946f9_cache_gap_analysis_results_in_db.py new file mode 100644 index 000000000..7eeafb7f1 --- /dev/null +++ b/migrations/versions/5029c02946f9_cache_gap_analysis_results_in_db.py @@ -0,0 +1,72 @@ +"""cache gap analysis results in db + +Revision ID: 5029c02946f9 +Revises: fffdc0652e27 +Create Date: 2023-10-24 09:02:37.535867 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "5029c02946f9" +down_revision = "fffdc0652e27" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "gap_analysis_results", + sa.Column("cache_key", sa.String(), nullable=False), + sa.Column("ga_object", sa.String(), nullable=True), + sa.PrimaryKeyConstraint("cache_key", name=op.f("pk_gap_analysis_results")), + sa.UniqueConstraint("cache_key", name="unique_cache_key_field"), + ) + with op.batch_alter_table("embeddings", schema=None) as batch_op: + batch_op.alter_column( + "embeddings_url", existing_type=sa.VARCHAR(), nullable=True + ) + batch_op.alter_column( + "embeddings_content", existing_type=sa.VARCHAR(), nullable=True + ) + batch_op.create_foreign_key( + batch_op.f("fk_embeddings_cre_id_cre"), + "cre", + ["cre_id"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + batch_op.create_foreign_key( + batch_op.f("fk_embeddings_node_id_node"), + "node", + ["node_id"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("embeddings", schema=None) as batch_op: + batch_op.drop_constraint( + batch_op.f("fk_embeddings_node_id_node"), type_="foreignkey" + ) + batch_op.drop_constraint( + batch_op.f("fk_embeddings_cre_id_cre"), type_="foreignkey" + ) + batch_op.alter_column( + "embeddings_content", existing_type=sa.VARCHAR(), nullable=False + ) + batch_op.alter_column( + "embeddings_url", existing_type=sa.VARCHAR(), nullable=False + ) + + op.drop_table("gap_analysis_results") + # ### end Alembic commands ### From a8a0330aac9c9fd46d2e0f25811c94ebaeb58ce7 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Sat, 28 Oct 2023 22:14:34 +0100 Subject: [PATCH 75/75] Get e2e tests reporting correctly (#395) * try pip cacheing * try yarn cache * try background service method * e2e test fixes * tmp increase timeouts for e2e tests due to new neo4j experiments * fix e2e tests * make e2e workflow match makefile * Revert actions simplification * increase timing * balance timings --------- Signed-off-by: Spyros Co-authored-by: Spyros Co-authored-by: Spyros --- .github/workflows/e2e.yml | 12 ++- Makefile | 3 +- .../src/pages/Search/components/SearchBar.tsx | 2 +- .../frontend/src/test/basic-e2etest.ts | 74 +++++++++---------- 4 files changed, 45 insertions(+), 46 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 4d167a4ed..e252f2987 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -4,17 +4,25 @@ jobs: build: name: Test-e2e runs-on: ubuntu-latest + timeout-minutes: 10 steps: - name: Check out code uses: actions/checkout@v2 - uses: actions/setup-python@v4 with: - python-version: '3.11.4' + python-version: '3.11.4' cache: 'pip' - uses: actions/setup-node@v3 with: cache: 'yarn' - name: Install python dependencies run: sudo apt-get update && sudo apt-get install -y python3-setuptools python3-pip chromium-browser libgbm1 && make install-deps + - name: Run App in background + run: | + yarn build + [ -d "./venv" ] && . ./venv/bin/activate + export FLASK_APP=./cre.py + export FLASK_CONFIG=development + FLASK_CONFIG=development flask run& - name: Test-e2e - run: make e2e \ No newline at end of file + run: yarn test:e2e diff --git a/Makefile b/Makefile index f78d2dce6..669bbb770 100644 --- a/Makefile +++ b/Makefile @@ -27,8 +27,7 @@ e2e: [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py export FLASK_CONFIG=development - flask run& - + FLASK_CONFIG=development flask run& yarn test:e2e killall yarn killall flask diff --git a/application/frontend/src/pages/Search/components/SearchBar.tsx b/application/frontend/src/pages/Search/components/SearchBar.tsx index 42020f7c0..2989ed3be 100644 --- a/application/frontend/src/pages/Search/components/SearchBar.tsx +++ b/application/frontend/src/pages/Search/components/SearchBar.tsx @@ -45,7 +45,7 @@ export const SearchBar = () => { }); }} label={ - diff --git a/application/frontend/src/test/basic-e2etest.ts b/application/frontend/src/test/basic-e2etest.ts index 3d4c6a521..cae04e908 100644 --- a/application/frontend/src/test/basic-e2etest.ts +++ b/application/frontend/src/test/basic-e2etest.ts @@ -20,28 +20,28 @@ describe('App.js', () => { await page.goto('http://localhost:5000'); await page.waitForSelector('#SearchBar'); const text = await page.$eval('#SearchBar', (e) => e.textContent); - expect(text).toContain('Topic text'); + expect(text).toContain('Search'); }); it('can search for random strs', async () => { await page.goto('http://127.0.0.1:5000'); - await page.waitForSelector('#SearchBar', { timeout: 1000 }); - await page.waitForSelector('#SearchButton', { timeout: 1000 }); + await page.waitForSelector('#SearchBar', { timeout: 10000 }); + await page.waitForSelector('#SearchButton', { timeout: 10000 }); await page.type('#SearchBar > div > input', 'asdf'); - await page.click('#SearchButton > button'); - await page.waitForSelector('.content', { timeout: 1000 }); + await page.click('#SearchButton'); + await page.waitForSelector('.content', { timeout: 10000 }); const text = await page.$eval('.content', (e) => e.textContent); expect(text).toContain('Document could not be loaded'); }); it('can search for cryptography using the free text method and it returns both Nodes and CRES', async () => { await page.goto('http://127.0.0.1:5000'); - await page.waitForSelector('#SearchBar', { timeout: 1000 }); - await page.waitForSelector('#SearchButton', { timeout: 1000 }); + await page.waitForSelector('#SearchBar', { timeout: 10000 }); + await page.waitForSelector('#SearchButton', { timeout: 10000 }); await page.type('#SearchBar > div > input', 'crypto'); - await page.click('#SearchButton > button'); - await page.waitForSelector('.content', { timeout: 1000 }); - await page.waitForSelector('.standard-page__links-container', { timeout: 1000 }); + await page.click('#SearchButton'); + await page.waitForSelector('.content', { timeout: 10000 }); + await page.waitForSelector('.standard-page__links-container', { timeout: 10000 }); const text = await page.$eval('.content', (e) => e.textContent); expect(text).not.toContain('Document could not be loaded'); @@ -56,21 +56,15 @@ describe('App.js', () => { }); it('can search for a standard by name, section and the standard page works as expected', async () => { - await page.goto('http://127.0.0.1:5000'); - await page.waitForSelector('#SearchBar', { timeout: 1000 }); - await page.waitForSelector('#SearchButton', { timeout: 1000 }); - await page.type('#SearchBar > div > input', 'asvs'); - await page.click('#SearchBar > .ui > .dropdown'); - await page.click('div[path="/node/standard"]'); - await page.click('#SearchButton > button'); - await page.waitForSelector('.content', { timeout: 1000 }); - await page.waitForSelector('.standard-page__links-container', { timeout: 1000 }); + await page.goto('http://127.0.0.1:5000/node/standard/ASVS'); + await page.waitForSelector('.content', { timeout: 10000 }); + await page.waitForSelector('.standard-page__links-container', { timeout: 10000 }); const text = await page.$$('.content', (e) => e.textContent); expect(text).not.toContain('Document could not be loaded'); // title match const page_title = await page.$eval('.standard-page__heading', (e) => e.textContent); - expect(page_title).toContain('asvs'); + expect(page_title).toContain('ASVS'); // results const results = await page.$$('.standard-page__links-container'); @@ -79,16 +73,16 @@ describe('App.js', () => { // pagination const original_content = await page.content(); await page.click('a[type="pageItem"][value="2"]'); - await page.waitForSelector('.content', { timeout: 1000 }); + await page.waitForSelector('.content', { timeout: 10000 }); expect(await page.content()).not.toEqual(original_content); // link to section await page.click('.standard-page__links-container>.title>a'); - await page.waitForSelector('.content', { timeout: 1000 }); + await page.waitForSelector('.content', { timeout: 10000 }); const url = await page.url(); expect(url).toContain('section'); - const section = await page.$eval('.section-page > h5.standard-page__sub-heading', (e) => e.textContent); - expect(section).toContain('Section:'); + const section = await page.$eval('.standard-page > span:nth-child(2)', (e) => e.textContent); + expect(section).toContain('Reference:'); // show reference const hrefs = await page.evaluate(() => @@ -109,40 +103,38 @@ describe('App.js', () => { it('can search for a cre', async () => { await page.goto('http://127.0.0.1:5000'); - await page.waitForSelector('#SearchBar', { timeout: 1000 }); - await page.waitForSelector('#SearchButton', { timeout: 1000 }); + await page.waitForSelector('#SearchBar', { timeout: 10000 }); + await page.waitForSelector('#SearchButton', { timeout: 10000 }); await page.type('#SearchBar > div > input', '558-807'); - await page.click('#SearchBar > .ui > .dropdown'); - await page.click('div[path="/cre"]'); - await page.click('#SearchButton > button'); - await page.waitForSelector('.content', { timeout: 1000 }); - await page.waitForSelector('.cre-page__links-container', { timeout: 2000 }); + await page.click('#SearchButton'); + await page.waitForSelector('.content', { timeout: 10000 }); + await page.waitForSelector('.standard-page__links-container', { timeout: 10000 }); const text = await page.$$('.content', (e) => e.textContent); expect(text).not.toContain('Document could not be loaded'); // title match - const page_title = await page.$eval('.cre-page__heading', (e) => e.textContent); - expect(page_title).toContain('Mutually authenticate application and credential service provider'); + const entry_title = await page.$eval('div.title.document-node', (e) => e.textContent); + expect(entry_title).toContain('Mutually authenticate application and credential service provider'); // results - const results = await page.$$('.cre-page__links-container'); - expect(results.length).toBeGreaterThan(1); + const results = await page.$$('.standard-page__links-container'); + expect(results.length).toBe(1); // // nesting - await page.click('div.cre-page__links:nth-child(2) > div:nth-child(2)'); + await page.click('.dropdown'); const selector = - '.cre-page__links-container>.document-node>.document-node__link-type-container:nth-child(2)'; - await page.waitForSelector(selector, { timeout: 2000 }); + '.standard-page__links-container>.document-node>.document-node__link-type-container:nth-child(2)'; + await page.waitForSelector(selector, { timeout: 10000 }); const nested = await page.$$( - '.cre-page__links-container>.document-node>.document-node__link-type-container>div>.accordion' + '.standard-page__links-container>.document-node>.document-node__link-type-container>div>.accordion' ); expect(nested.length).toBeGreaterThan(1); }); it('can filter', async () => { await page.goto('http://127.0.0.1:5000/cre/558-807?applyFilters=true&filters=asvs'); - await page.waitForSelector('.cre-page__links-container', { timeout: 2000 }); + await page.waitForSelector('.cre-page__links-container', { timeout: 10000 }); // Get inner text const innerText = await page.evaluate( () => (document.querySelector('.cre-page__links-container') as HTMLElement)?.innerText @@ -153,7 +145,7 @@ describe('App.js', () => { // ensure case insensitive filtering await page.goto('http://127.0.0.1:5000/cre/558-807?applyFilters=true&filters=ASVS'); - await page.waitForSelector('.cre-page__links-container', { timeout: 2000 }); + await page.waitForSelector('.cre-page__links-container', { timeout: 10000 }); const intxt = await page.evaluate( () => (document.querySelector('.cre-page__links-container') as HTMLElement)?.innerText );