From bb3e5154c7315da1918a220ae981e274602fbe0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20L=C3=B3pez-Doriga?= Date: Wed, 3 Apr 2024 20:03:01 +0200 Subject: [PATCH] synonyms added --- beacon/db/extract_filtering_terms.py | 37 +++---------- beacon/db/filters.py | 80 +++++++++++++++++++++------- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/beacon/db/extract_filtering_terms.py b/beacon/db/extract_filtering_terms.py index 240ac21c..d18055d6 100644 --- a/beacon/db/extract_filtering_terms.py +++ b/beacon/db/extract_filtering_terms.py @@ -238,7 +238,6 @@ def find_ontology_terms_used(collection_name: str) -> List[Dict]: for r in tqdm(xs, total=num_total): matches = ONTOLOGY_REGEX.findall(str(r)) icd_matches = ICD_REGEX.findall(str(r)) - print(icd_matches) for ontology_id, term_id in matches: term = ':'.join([ontology_id, term_id]) if term not in terms_ids: @@ -269,12 +268,9 @@ def get_filtering_object(terms_ids: list, collection_name: str): try: field = field_dict['field'] label = field_dict['label'] - if label == 'Weight': - ontology_label = 'Weight in Kilograms' - elif label == 'Height-standing': - ontology_label = 'Height-standing in Centimeters' - elif label == 'BMI': - ontology_label = 'BMI in Kilograms per Square Meter' + value_id=None + if 'measurements.assayCode' in field: + value_id = label else: ontology_label = label if field is not None: @@ -304,29 +300,10 @@ def get_filtering_object(terms_ids: list, collection_name: str): #'count': get_ontology_term_count(collection_name, onto), 'scope': [collection_name[0:-1]] }) - if label == 'Weight': + if value_id is not None: terms.append({ 'type': 'alphanumeric', - 'id': label, - 'label': ontology_label, - # TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms - #'count': get_ontology_term_count(collection_name, onto), - 'scope': [collection_name[0:-1]] - }) - if label == 'BMI': - terms.append({ - 'type': 'alphanumeric', - 'id': label, - 'label': ontology_label, - # TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms - #'count': get_ontology_term_count(collection_name, onto), - 'scope': [collection_name[0:-1]] - }) - if label == 'Height-standing': - terms.append({ - 'type': 'alphanumeric', - 'id': label, - 'label': ontology_label, + 'id': value_id, # TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms #'count': get_ontology_term_count(collection_name, onto), 'scope': [collection_name[0:-1]] @@ -384,12 +361,12 @@ def merge_terms(): array_of_ids.append(new_id) else: repeated_ids.append(new_id) - print("repeated_ids are {}".format(repeated_ids)) + #print("repeated_ids are {}".format(repeated_ids)) for repeated_id in repeated_ids: repeated_terms = client.beacon.filtering_terms.find({"id": repeated_id, "type": "ontology"}) array_of_scopes=[] for repeated_term in repeated_terms: - print(repeated_term) + #print(repeated_term) id=repeated_term["id"] label=repeated_term["label"] if repeated_term['scope'] != []: diff --git a/beacon/db/filters.py b/beacon/db/filters.py index 5d4382a6..64b7f304 100644 --- a/beacon/db/filters.py +++ b/beacon/db/filters.py @@ -388,6 +388,7 @@ def apply_filters(query: dict, filters: List[dict], collection: str, query_param def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str, request_parameters: dict) -> dict: + final_term_list=[] query_synonyms={} query_synonyms['id']=filter.id synonyms=get_documents( @@ -403,6 +404,7 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str, synonym_id=None LOG.debug(synonym_id) if synonym_id is not None: + final_term_list.append(filter.id) filter.id=synonym_id @@ -470,20 +472,21 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str, query_terms = doc2['id'] query_terms = query_terms.split(':') query_term = query_terms[0] + '.id' - query_id={} - query['$or']=[] - for simil in final_term_list: + if final_term_list !=[]: + new_query={} query_id={} - query_id[query_term]=simil - query['$or'].append(query_id) - LOG.debug(query) + new_query['$or']=[] + for simil in final_term_list: + query_id={} + query_id[query_term]=simil + new_query['$or'].append(query_id) + query = new_query else: pass # Apply descendant terms if filter.include_descendant_terms == True: - final_term_list=[] final_term_list.append(filter.id) is_filter_id_required = False ontology=filter.id.replace("\n","") @@ -546,13 +549,17 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str, query_terms = doc2['id'] query_terms = query_terms.split(':') query_term = query_terms[0] + '.id' - - query_id={} - query['$or']=[] - for simil in final_term_list: + + if final_term_list !=[]: + LOG.debug(final_term_list) + new_query={} query_id={} - query_id[query_term]=simil - query['$or'].append(query_id) + new_query['$or']=[] + for simil in final_term_list: + query_id={} + query_id[query_term]=simil + new_query['$or'].append(query_id) + query = new_query LOG.debug(query) query=cross_query(query, scope, collection, request_parameters) @@ -596,6 +603,16 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str, query_terms = query_terms.split(':') query_term = query_terms[0] + '.id' query[query_term]=filter.id + if final_term_list !=[]: + new_query={} + query_id={} + new_query['$or']=[] + for simil in final_term_list: + query_id={} + query_id[query_term]=simil + new_query['$or'].append(query_id) + new_query['$or'].append(query) + query = new_query LOG.debug(query) @@ -770,16 +787,41 @@ def apply_alphanumeric_filter(query: dict, filter: AlphanumericFilter, collectio query_id[query_term]=filter.value query['$nor'].append(query_id) else: - query['measurementValue.value'] = { formatted_operator: float(formatted_value) } - if "LOINC" in filter.id: - query['assayCode.id']=filter.id - else: - query['assayCode.label']=filter.id + query_filtering={} + query_filtering['$and']=[] + dict_type={} + dict_id={} + dict_regex={} + dict_regex['$regex']=filter.id + dict_type['type']='custom' + dict_id['id']=dict_regex + query_filtering['$and'].append(dict_type) + query_filtering['$and'].append(dict_id) + docs = get_documents( + client.beacon.filtering_terms, + query_filtering, + 0, + 1 + ) + for doc in docs: + LOG.debug(doc) + prefield_splitted = doc['id'].split(':') + prefield = prefield_splitted[0] + field = prefield.replace('assayCode', 'measurementValue.value') + + assayfield = 'assayCode' + '.label' + fieldsplitted = field.split('.') + measuresfield=fieldsplitted[0] + + field = field.replace(measuresfield+'.', '') + + query[field] = { formatted_operator: float(formatted_value) } + query[assayfield]=filter.id #LOG.debug(query) dict_elemmatch={} dict_elemmatch['$elemMatch']=query dict_measures={} - dict_measures['measures']=dict_elemmatch + dict_measures[measuresfield]=dict_elemmatch query = dict_measures LOG.debug(collection) query=cross_query(query, scope, collection, {})