-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjson_builder.py
44 lines (32 loc) · 1.33 KB
/
json_builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from jinja2 import Markup
from markdown import markdown
import datetime
import json
import logging
import re
import sys
def build_transcript(transcript_json):
logging.debug(transcript_json['results']['transcripts'][0]['transcript'])
json_results = transcript_json['results']
channels = json_results['channel_labels']['channels']
voices = {'ch_0': 'speaker 1', 'ch_1': 'speaker 2'}
speaker = voices['ch_0']
text_lines = [f'{speaker}\n']
for item in json_results['items']:
for channel in channels:
if item in channel['items']:
ch = channel['channel_label']
content = item['alternatives'][0]['content']
if item['type'] != 'punctuation':
if speaker != voices[ch]:
speaker = voices[ch]
start_time = round(float(item['start_time']))
text_lines.append(f'\n\n{speaker}: {start_time}\n')
if float(item['alternatives'][0]['confidence']) < 0.85:
content = f'**{content}**'
elif text_lines[-1] == content:
continue
text_lines.append(content)
content = ' '.join(text_lines)
content, count = re.subn(r' (?=[\.\,\?\!])', '', content)
return Markup(markdown(content))