Skip to content

Commit

Permalink
synonyms added
Browse files Browse the repository at this point in the history
  • Loading branch information
costero-e committed Apr 3, 2024
1 parent 36b320a commit bb3e515
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 49 deletions.
37 changes: 7 additions & 30 deletions beacon/db/extract_filtering_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ def find_ontology_terms_used(collection_name: str) -> List[Dict]:
for r in tqdm(xs, total=num_total):
matches = ONTOLOGY_REGEX.findall(str(r))
icd_matches = ICD_REGEX.findall(str(r))
print(icd_matches)
for ontology_id, term_id in matches:
term = ':'.join([ontology_id, term_id])
if term not in terms_ids:
Expand Down Expand Up @@ -269,12 +268,9 @@ def get_filtering_object(terms_ids: list, collection_name: str):
try:
field = field_dict['field']
label = field_dict['label']
if label == 'Weight':
ontology_label = 'Weight in Kilograms'
elif label == 'Height-standing':
ontology_label = 'Height-standing in Centimeters'
elif label == 'BMI':
ontology_label = 'BMI in Kilograms per Square Meter'
value_id=None
if 'measurements.assayCode' in field:
value_id = label
else:
ontology_label = label
if field is not None:
Expand Down Expand Up @@ -304,29 +300,10 @@ def get_filtering_object(terms_ids: list, collection_name: str):
#'count': get_ontology_term_count(collection_name, onto),
'scope': [collection_name[0:-1]]
})
if label == 'Weight':
if value_id is not None:
terms.append({
'type': 'alphanumeric',
'id': label,
'label': ontology_label,
# TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms
#'count': get_ontology_term_count(collection_name, onto),
'scope': [collection_name[0:-1]]
})
if label == 'BMI':
terms.append({
'type': 'alphanumeric',
'id': label,
'label': ontology_label,
# TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms
#'count': get_ontology_term_count(collection_name, onto),
'scope': [collection_name[0:-1]]
})
if label == 'Height-standing':
terms.append({
'type': 'alphanumeric',
'id': label,
'label': ontology_label,
'id': value_id,
# TODO: Use conf.py -> beaconGranularity to not disclouse counts in the filtering terms
#'count': get_ontology_term_count(collection_name, onto),
'scope': [collection_name[0:-1]]
Expand Down Expand Up @@ -384,12 +361,12 @@ def merge_terms():
array_of_ids.append(new_id)
else:
repeated_ids.append(new_id)
print("repeated_ids are {}".format(repeated_ids))
#print("repeated_ids are {}".format(repeated_ids))
for repeated_id in repeated_ids:
repeated_terms = client.beacon.filtering_terms.find({"id": repeated_id, "type": "ontology"})
array_of_scopes=[]
for repeated_term in repeated_terms:
print(repeated_term)
#print(repeated_term)
id=repeated_term["id"]
label=repeated_term["label"]
if repeated_term['scope'] != []:
Expand Down
80 changes: 61 additions & 19 deletions beacon/db/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ def apply_filters(query: dict, filters: List[dict], collection: str, query_param


def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str, request_parameters: dict) -> dict:
final_term_list=[]
query_synonyms={}
query_synonyms['id']=filter.id
synonyms=get_documents(
Expand All @@ -403,6 +404,7 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str,
synonym_id=None
LOG.debug(synonym_id)
if synonym_id is not None:
final_term_list.append(filter.id)
filter.id=synonym_id


Expand Down Expand Up @@ -470,20 +472,21 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str,
query_terms = doc2['id']
query_terms = query_terms.split(':')
query_term = query_terms[0] + '.id'
query_id={}
query['$or']=[]
for simil in final_term_list:
if final_term_list !=[]:
new_query={}
query_id={}
query_id[query_term]=simil
query['$or'].append(query_id)
LOG.debug(query)
new_query['$or']=[]
for simil in final_term_list:
query_id={}
query_id[query_term]=simil
new_query['$or'].append(query_id)
query = new_query
else:
pass


# Apply descendant terms
if filter.include_descendant_terms == True:
final_term_list=[]
final_term_list.append(filter.id)
is_filter_id_required = False
ontology=filter.id.replace("\n","")
Expand Down Expand Up @@ -546,13 +549,17 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str,
query_terms = doc2['id']
query_terms = query_terms.split(':')
query_term = query_terms[0] + '.id'

query_id={}
query['$or']=[]
for simil in final_term_list:
if final_term_list !=[]:
LOG.debug(final_term_list)
new_query={}
query_id={}
query_id[query_term]=simil
query['$or'].append(query_id)
new_query['$or']=[]
for simil in final_term_list:
query_id={}
query_id[query_term]=simil
new_query['$or'].append(query_id)
query = new_query

LOG.debug(query)
query=cross_query(query, scope, collection, request_parameters)
Expand Down Expand Up @@ -596,6 +603,16 @@ def apply_ontology_filter(query: dict, filter: OntologyFilter, collection: str,
query_terms = query_terms.split(':')
query_term = query_terms[0] + '.id'
query[query_term]=filter.id
if final_term_list !=[]:
new_query={}
query_id={}
new_query['$or']=[]
for simil in final_term_list:
query_id={}
query_id[query_term]=simil
new_query['$or'].append(query_id)
new_query['$or'].append(query)
query = new_query
LOG.debug(query)


Expand Down Expand Up @@ -770,16 +787,41 @@ def apply_alphanumeric_filter(query: dict, filter: AlphanumericFilter, collectio
query_id[query_term]=filter.value
query['$nor'].append(query_id)
else:
query['measurementValue.value'] = { formatted_operator: float(formatted_value) }
if "LOINC" in filter.id:
query['assayCode.id']=filter.id
else:
query['assayCode.label']=filter.id
query_filtering={}
query_filtering['$and']=[]
dict_type={}
dict_id={}
dict_regex={}
dict_regex['$regex']=filter.id
dict_type['type']='custom'
dict_id['id']=dict_regex
query_filtering['$and'].append(dict_type)
query_filtering['$and'].append(dict_id)
docs = get_documents(
client.beacon.filtering_terms,
query_filtering,
0,
1
)
for doc in docs:
LOG.debug(doc)
prefield_splitted = doc['id'].split(':')
prefield = prefield_splitted[0]
field = prefield.replace('assayCode', 'measurementValue.value')

assayfield = 'assayCode' + '.label'
fieldsplitted = field.split('.')
measuresfield=fieldsplitted[0]

field = field.replace(measuresfield+'.', '')

query[field] = { formatted_operator: float(formatted_value) }
query[assayfield]=filter.id
#LOG.debug(query)
dict_elemmatch={}
dict_elemmatch['$elemMatch']=query
dict_measures={}
dict_measures['measures']=dict_elemmatch
dict_measures[measuresfield]=dict_elemmatch
query = dict_measures
LOG.debug(collection)
query=cross_query(query, scope, collection, {})
Expand Down

0 comments on commit bb3e515

Please sign in to comment.