Skip to content

Commit

Permalink
all relationship deprecations now happen in s2a muchdogesec/arango_cv…
Browse files Browse the repository at this point in the history
  • Loading branch information
fqrious committed Dec 10, 2024
1 parent a76b94a commit 368fdbd
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 5 deletions.
35 changes: 32 additions & 3 deletions stix2arango/services/arangodb_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,13 @@ def insert_relationships_chunked(self, relationships: list[dict[str, Any]], id_t
return self.insert_several_objects_chunked(relationships, collection_name, chunk_size=chunk_size)

def update_is_latest_several(self, object_ids, collection_name):
#returns newly deprecated _ids
objects_in = {k: True for k in object_ids}
query = """
LET matched_objects = ( // collect all the modified into a single list of {id: ?, modified: ?, _record_modified: ?, _key: ?}
FOR object in @@collection
FILTER @objects_in[object.id] != NULL
RETURN KEEP(object, 'id', 'modified', '_record_modified', '_key')
RETURN KEEP(object, 'id', 'modified', '_record_modified', '_key', '_id')
)
LET modified_map = MERGE( // get max modified by ID
Expand All @@ -200,6 +201,8 @@ def update_is_latest_several(self, object_ids, collection_name):
FOR doc IN matched_objects
LET _is_latest = modified_map[doc.id] == doc._key
UPDATE {_key: doc._key, _is_latest} IN @@collection
FILTER _is_latest != doc._is_latest AND _is_latest == FALSE
RETURN doc._id
"""
return self.execute_raw_query(query, bind_vars={
"@collection": collection_name,
Expand All @@ -209,12 +212,38 @@ def update_is_latest_several(self, object_ids, collection_name):
def update_is_latest_several_chunked(self, object_ids, collection_name, edge_collection=None, chunk_size=500):
logging.info(f"Updating _is_latest for {len(object_ids)} newly inserted items")
progress_bar = tqdm(utils.chunked(object_ids, chunk_size), total=len(object_ids))
deprecated_key_ids = [] # contains newly deprecated _ids
for chunk in progress_bar:
self.update_is_latest_several(chunk, collection_name)
progress_bar.update(len(chunk))
deprecated_key_ids = self.update_is_latest_several(chunk, collection_name)
progress_bar.update(len(chunk)/2)
self.deprecate_relationships(deprecated_key_ids, edge_collection)
progress_bar.update(len(chunk)/2)

if edge_collection:
self.update_is_latest_for_embedded_refs(object_ids, edge_collection)
return deprecated_key_ids

def deprecate_relationships(self, deprecated_key_ids: list, edge_collection: str):
deprecation_count = 0
query = """
FOR doc IN @@collection
FILTER doc._from IN @deprecated_key_ids AND doc._is_latest
UPDATE {_key: doc._key, _is_latest: FALSE} IN @@collection
// FILTER doc._is_ref != TRUE // no need for further propagation for embedded relationships
RETURN doc._id
"""
logging.info("deprecating relationships for %d objects", len(deprecated_key_ids))

while deprecated_key_ids and edge_collection:
deprecated_key_ids = self.execute_raw_query(query, bind_vars={
"@collection": edge_collection,
"deprecated_key_ids": deprecated_key_ids,
})
deprecation_count += len(deprecated_key_ids)
logging.info("deprecated %d relationships", deprecation_count)

return deprecation_count


def _update_is_latest_for_embedded_refs(self, object_ids, edge_collection):
query = """
Expand Down
4 changes: 2 additions & 2 deletions stix2arango/stix2arango/stix2arango.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def process_bundle_into_graph(self, filename: str, data, notes=None):

module_logger.info(f"Inserting objects into database. Total objects: {len(objects)}")
inserted_object_ids, existing_objects = self.arango.insert_several_objects_chunked(objects, self.core_collection_vertex)
self.arango.update_is_latest_several_chunked(inserted_object_ids, self.core_collection_vertex)
self.arango.update_is_latest_several_chunked(inserted_object_ids, self.core_collection_vertex, self.core_collection_edge)

self.update_object_key_mapping(objects, existing_objects)
return inserted_object_ids, existing_objects
Expand Down Expand Up @@ -163,7 +163,7 @@ def map_embedded_relationships(self, data, inserted_object_ids):
module_logger.info(f"Inserting embedded relationship into database. Total objects: {len(objects)}")

inserted_object_ids, existing_objects = self.arango.insert_relationships_chunked(objects, self.object_key_mapping, self.core_collection_edge)
self.arango.update_is_latest_several_chunked(inserted_object_ids, self.core_collection_edge)
self.arango.update_is_latest_several_chunked(inserted_object_ids, self.core_collection_edge, self.core_collection_edge)
return inserted_object_ids, existing_objects

def import_default_objects(self):
Expand Down

0 comments on commit 368fdbd

Please sign in to comment.