-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess.py
executable file
·58 lines (47 loc) · 1.74 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python
import sys
import json
class ProcessingException(Exception):
def __init__(self, question, message):
super().__init__(message)
self.message = message
self.question = question
def __str__(self):
return f"Question: {self.question}: {self.message}"
# Find longest prefix of question that appears 1-3 times
def find_index(transcript: str, question: str):
for i in range(len(question), 0, -1):
prefix = question[:i]
prefix_count = transcript.count(prefix)
if 1 <= prefix_count <= 3:
return transcript.rfind(prefix)
raise ProcessingException(question, "No occurence of 1-3 times")
def process_transcript(transcript: str, questions: list[str]):
transcript_lower = transcript.lower()
def build_info(question):
question_lower = question.lower()
index = find_index(transcript_lower, question_lower)
return {
"question": question,
"index": index
}
results = [build_info(question) for question in questions]
return sorted(results, key = lambda x: x["index"])
def read_transcript(id):
with open(f"transcripts/{id}.txt", 'r') as file:
return ''.join(file.read().splitlines())
def build_results(id):
transcript = read_transcript(id)
with open(f"questions/{id}.txt", 'r') as file:
questions = list(map(str.strip, filter(None, file.read().splitlines())))
results = process_transcript(transcript, questions)
ds = []
for res in results:
print(f'{res["index"]:5} {res["question"]}')
ds.append(res["index"])
dds = [ds[i] - ds[i - 1] for i in range(1, len(ds))]
print(dds)
with open(f"results/{id}.txt", 'w', encoding='utf8') as outfile:
json.dump(results, outfile, ensure_ascii=False)
id = int(sys.argv[1])
build_results(id)