Skip to content

Commit

Permalink
move cycle detection to the graph 'add_edge' method to fix databases …
Browse files Browse the repository at this point in the history
…with existing cycles
  • Loading branch information
northdpole committed Sep 25, 2024
1 parent fa42c69 commit 821de88
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 49 deletions.
52 changes: 23 additions & 29 deletions application/database/db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from sqlalchemy import CheckConstraint
import networkx as nx
import uuid
import neo4j
Expand Down Expand Up @@ -129,6 +130,7 @@ class InternalLinks(BaseModel): # type: ignore
cre,
name="uq_pair",
),
sqla.CheckConstraint("type != 'PartOf'", name="No 'PartOf' links"),
)


Expand Down Expand Up @@ -683,8 +685,10 @@ def with_graph(self) -> "Node_collection":
graph_singleton = inmemory_graph.Singleton_Graph_Storage.instance()
self.graph.with_graph(
graph=graph_singleton,
graph_data=self.__get_all_nodes_and_cres(),
graph_data=self.__get_all_nodes_and_cres(cres_only=True),
)
logger.info("Successfully loaded CRE graph in memory")

return self

def __get_external_links(self) -> List[Tuple[CRE, Node, str]]:
Expand Down Expand Up @@ -734,13 +738,16 @@ def __get_unlinked_cres(self) -> List[CRE]:
)
return cres

def __get_all_nodes_and_cres(self) -> List[cre_defs.Document]:
def __get_all_nodes_and_cres(
self, cres_only: bool = False
) -> List[cre_defs.Document]:
result = []
nodes = []
cres = []
node_ids = self.session.query(Node.id).all()
for nid in node_ids:
result.extend(self.get_nodes(db_id=nid[0]))
if not cres_only:
node_ids = self.session.query(Node.id).all()
for nid in node_ids:
result.extend(self.get_nodes(db_id=nid[0]))

cre_ids = self.session.query(CRE.id).all()
for cid in cre_ids:
Expand Down Expand Up @@ -1436,7 +1443,7 @@ def add_cre(self, cre: cre_defs.CRE) -> CRE:
entry.tags = ",".join(cre.tags)
return entry
else:
logger.info("did not know of %s ,adding" % cre.name)
logger.info("did not know of cre %s ,adding" % cre.name)
entry = CRE(
description=cre.description,
name=cre.name,
Expand All @@ -1452,6 +1459,9 @@ def add_cre(self, cre: cre_defs.CRE) -> CRE:
def add_node(
self, node: cre_defs.Node, comparison_skip_attributes: List = ["link"]
) -> Optional[Node]:
if not node:
raise ValueError(f"Node is None")
return None
dbnode = dbNodeFromNode(node)
if not dbnode:
logger.warning(f"{node} could not be transformed to a DB object")
Expand All @@ -1472,7 +1482,7 @@ def add_node(
return entry
else:
logger.info(
f"did not know of {dbnode.name}:{dbnode.section}:{dbnode.section_id} ,adding"
f"did not know of node {dbnode.name}:{dbnode.section}:{dbnode.section_id} ,adding"
)
self.session.add(dbnode)
self.session.commit()
Expand Down Expand Up @@ -1590,32 +1600,16 @@ def add_internal_link(
higher_cre = CREfromDB(higher)
lower_cre = CREfromDB(lower)
link_to = cre_defs.Link(document=lower_cre, ltype=ltype)
cycle = self.graph.introduces_cycle(doc_from=higher_cre, link_to=link_to)

if not cycle:
try:
self.graph.add_link(doc_from=higher_cre, link_to=link_to)
self.session.add(
InternalLinks(type=ltype.value, cre=lower.id, group=higher.id)
)
self.session.commit()
if self.graph:
self.graph.add_link(doc_from=higher_cre, link_to=link_to)
else:
for item in cycle:
from_id = item[0].replace("CRE: ", "")
to_id = item[1].replace("CRE: ", "")
from_cre = self.session.query(CRE).filter(lower.id == from_id).first()
to_cre = self.session.query(CRE).filter(lower.id == to_id).first()
if from_cre and to_cre:
item[0].replace(from_id, from_cre.name)
item[1].replace(to_id, to_cre.name)

logger.warning(
f"A link between CREs {higher.external_id}-{higher.name} and"
f" {lower.external_id}-{lower.name} "
f"would introduce cycle {cycle}, skipping"
)
return None
return cre_defs.Link(document=lower, ltype=ltype)
return cre_defs.Link(document=lower, ltype=ltype)
except inmemory_graph.CycleDetectedError as cde:
# cycle detected, do nothing
pass

def add_link(
self,
Expand Down
62 changes: 42 additions & 20 deletions application/database/inmemory_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
logger.setLevel(logging.INFO)


class CycleDetectedError(Exception):
def __init__(self, message):
super().__init__(message)


class Singleton_Graph_Storage(nx.DiGraph):
__instance: "Singleton_Graph_Storage" = None

Expand All @@ -30,10 +35,10 @@ class CRE_Graph:
def get_raw_graph(self):
return self.__graph

def with_graph(cls, graph: nx.Graph, graph_data: List[defs.Document]):
cls.__graph = graph
def with_graph(self, graph: nx.Graph, graph_data: List[defs.Document]):
self.__graph = graph
if not len(graph.edges):
cls.__load_cre_graph(graph_data)
self.__load_cre_graph(graph_data)

def introduces_cycle(self, doc_from: defs.Document, link_to: defs.Link):
try:
Expand Down Expand Up @@ -62,7 +67,7 @@ def introduces_cycle(self, doc_from: defs.Document, link_to: defs.Link):

def has_cycle(self):
try:
ex = nx.find_cycle(self.__graph)
ex = nx.find_cycle(self.__graph, orientation="original")
return ex
except nx.exception.NetworkXNoCycle:
return None
Expand Down Expand Up @@ -132,30 +137,40 @@ def get_path(self, start: str, end: str) -> List[Tuple[str, str]]:
except nx.NetworkXNoPath:
return []

def add_cre(cls, cre: defs.CRE):
def add_cre(self, cre: defs.CRE):
if not isinstance(cre, defs.CRE):
raise ValueError(
f"inmemory graph add_cre takes only cre objects, instead got {type(cre)}"
)
graph_cre = f"{defs.Credoctypes.CRE.value}: {cre.id}"
if cre and graph_cre not in cls.__graph.nodes():
cls.__graph.add_node(graph_cre, internal_id=cre.id)
if cre and graph_cre not in self.__graph.nodes():
self.__graph.add_node(graph_cre, internal_id=cre.id)

def add_dbnode(cls, dbnode: defs.Node):
def add_dbnode(self, dbnode: defs.Node):
graph_node = "Node: " + str(dbnode.id)

if dbnode and graph_node not in cls.__graph.nodes():
cls.__graph.add_node(graph_node, internal_id=dbnode.id)
else:
logger.error("Called with dbnode being none")
if dbnode and graph_node not in self.__graph.nodes():
self.__graph.add_node(graph_node, internal_id=dbnode.id)

def add_link(self, doc_from: defs.Document, link_to: defs.Link):
logger.debug(
f"adding link {doc_from.id}, {link_to.document.id} ltype: {link_to.ltype}"
)
if (
doc_from.doctype == defs.Credoctypes.CRE
and link_to.document.doctype == defs.Credoctypes.CRE
):
cycle = self.introduces_cycle(doc_from=doc_from, link_to=link_to)
if cycle:
warn = f"A link between CREs {doc_from.id}-{doc_from.name} and {link_to.document.id}-{link_to.document.name} would introduce cycle {cycle}, skipping"
logger.warning(warn)
raise CycleDetectedError(warn)

self.__graph = self.__add_graph_edge(
doc_from=doc_from, link_to=link_to, graph=self.__graph
)

def __add_graph_edge(
cls,
self,
doc_from: defs.Document,
link_to: defs.Link,
graph: nx.DiGraph,
Expand Down Expand Up @@ -218,15 +233,22 @@ def __add_graph_edge(
)
return graph

def __load_cre_graph(cls, documents: List[defs.Document]):
def __load_cre_graph(self, documents: List[defs.Document]):
for doc in documents:
if not doc:
continue
if doc.doctype == defs.Credoctypes.CRE:
cls.add_cre(cre=doc)
self.add_cre(cre=doc)
else:
cls.add_dbnode(dbnode=doc)
self.add_dbnode(dbnode=doc)
for link in doc.links:
if not link.document:
logger.error(f"doc {doc}, has a link with a document that's None")
if link.document.doctype == defs.Credoctypes.CRE:
cls.add_cre(cre=link.document)
self.add_cre(cre=link.document)
else:
cls.add_dbnode(dbnode=link.document)
cls.__add_graph_edge(doc_from=doc, link_to=link, graph=cls.__graph)
self.add_dbnode(dbnode=link.document)
try:
self.add_link(doc_from=doc, link_to=link)
except CycleDetectedError as cde:
pass

0 comments on commit 821de88

Please sign in to comment.