-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnote-taker.py
121 lines (97 loc) · 3.4 KB
/
note-taker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pyaudio
import wave
import subprocess
import datetime
import threading
from pynput import keyboard
from openai import OpenAI
import dotenv
dotenv.load_dotenv()
client = OpenAI()
# Audio recording parameters
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
STOP_RECORDING = False
def list_audio_devices():
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
num_devices = info.get('deviceCount')
devices = []
for i in range(0, num_devices):
device_info = p.get_device_info_by_host_api_device_index(0, i)
devices.append(device_info)
print(f"{i}: {device_info.get('name')}")
p.terminate()
return devices
def get_timestamped_filename():
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
return f"output_{timestamp}.wav"
def record_audio(device_index, filename):
global STOP_RECORDING
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True, input_device_index=device_index,
frames_per_buffer=CHUNK)
print("Recording... Press 'ESC' to stop.")
with wave.open(filename, 'wb') as wf:
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
while not STOP_RECORDING:
data = stream.read(CHUNK)
wf.writeframes(data)
stream.stop_stream()
stream.close()
audio.terminate()
print(f"Finished recording. File saved as {filename}")
return filename
def on_press(key):
global STOP_RECORDING
try:
if key == keyboard.Key.esc:
STOP_RECORDING = True
return False # Stop listener
except AttributeError:
pass
def listen_for_keypress():
with keyboard.Listener(on_press=on_press) as listener:
listener.join()
def convert_audio_to_text(filename):
command = f"./whisper.cpp/main --model ./whisper.cpp/models/ggml-medium.en.bin ./{filename} --output-txt"
result = subprocess.run(command, shell=True, stdout=subprocess.PIPE)
# read in the output file - it is the filename + ".txt"
with open(f"{filename}.txt", "r", encoding='utf8') as f:
return f.read()
def summarize_text(text):
prompt = f"""The following is a transcript from a meeting. Turn this into some useful notes that I can use to refresh my memory before the next meeting.
{text}
"""
messages = [
{ 'role': 'user', 'content': prompt}
]
chat_completion = client.chat.completions.create(model="gpt-4o",
messages=messages)
return chat_completion.choices[0].message.content
def main():
global STOP_RECORDING
STOP_RECORDING = False
devices = list_audio_devices()
device_index = int(input("Select the device index to use for recording: "))
filename = get_timestamped_filename()
record_thread = threading.Thread(target=record_audio, args=(device_index, filename))
keypress_thread = threading.Thread(target=listen_for_keypress)
record_thread.start()
keypress_thread.start()
record_thread.join()
keypress_thread.join()
text = convert_audio_to_text(filename)
print(f"Transcribed Text: {text}")
summary = summarize_text(text)
print(f"Meeting Summary:\n\n {summary}")
# write the summary to a file
with open(f"{filename}.summary.md", "w") as f:
f.write(summary)
if __name__ == "__main__":
main()