-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
64 lines (60 loc) · 1.91 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import re
def check_template(text):
###
# This function helps determine whether the user inputs the correct format for the specialized question (for disease diagnosis)
# Age:
# Sex:
# Ethnicity:
# Race:
# Phenotypes:
# ~~~
# Candidate Genes from genome/exome sequencing tests:
# ~~~
# What is the likely diagnosis?
# ###
patterns = {
"age": r"Age:\s+\d+\s+(months|years)\s+old",
"sex": r"Sex:\s+(Male|Female)",
"ethnicity": r"Ethnicity:\s+[A-Za-z]+",
"race": r"Race:\s+[A-Za-z]+",
"phenotypes": r"Phenotypes:\s+([A-Za-z]+(,\s*)?)+",
"genes": r"Candidate Genes from genome/exome sequencing tests:\s+([A-Za-z0-9])",
"question": r"\s+What+\s+[A-Za-z]+"
}
# Split text into meaningful sections
lines = [line.strip() for line in text.strip().split("|") if line.strip()]
# Check the fixed sections
try:
# Validate each pattern in sequence
if not re.match(patterns["age"], lines[0]):
print(0)
return False
if not re.match(patterns["sex"], lines[1]):
print(1)
return False
if not re.match(patterns["ethnicity"], lines[2]):
print(2)
return False
if not re.match(patterns["race"], lines[3]):
print(3)
return False
if not re.match(patterns["phenotypes"], lines[4]):
print(4)
return False
if lines[5] != '~~~':
print(5)
return False
if not re.match(patterns["genes"], lines[6]):
print(6)
return False
if lines[7] != '~~~':
print(7)
return False
if "What" not in lines[8]:
print(8)
return False
except IndexError:
return False # Handle cases with missing lines
return True
def separate_texts(text):
return text.split('~~~')