-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyzer.py
executable file
·105 lines (94 loc) · 3.03 KB
/
analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
"""A script to extract the following acousting features from an audio file:
- Pitch (minimum, maximum, and average
- Intensity (minimum, maximum, and average
- Jitter
- Shimmer
- HNR (harmonics-to-noise ratio)
- Estimated speaking rate
"""
# imports
import glob2
import parselmouth
from parselmouth.praat import call
from tabulate import tabulate
# loads the data
sound_files = glob2.glob("*.wav")
# gets the correct transcript
def get_transcript(path: str, file_name: str) -> str:
transcription = ""
with open(path, "r") as source:
for line in source:
if line.lower().split(":")[0] == file_name:
transcription = line.split(":")[1]
return transcription
def main():
first_row = [
"Emotion",
"Min F0",
"Max F0",
"Mean F0",
"Min Int",
"Max Int",
"Mean Int",
"Jitter",
"Shimmer",
"HNR",
"Speaking Rate",
]
table = []
for file in sound_files:
file_name = file.split(".")[0]
input_sound = parselmouth.Sound(file)
# extracts the duration
duration = input_sound.get_total_duration()
# extracts the pitch metrics
pitch = call(input_sound, "To Pitch", 0.0, 75.0, 600.0)
minF0 = call(pitch, "Get minimum", 0.0, duration, "Hertz", "Parabolic")
maxF0 = call(pitch, "Get maximum", 0.0, duration, "Hertz", "Parabolic")
avgF0 = call(pitch, "Get mean", 0.0, duration, "Hertz")
# extracts the intensity metrics
intensity = call(input_sound, "To Intensity", 75.0, 0.0)
min_intensity = intensity.get_minimum()
max_intensity = intensity.get_maximum()
avg_intensity = intensity.get_average()
# extracts jitter
point_process = call(input_sound, "To PointProcess (periodic, cc)", 75.0, 600.0)
jitter = call(point_process, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3)
# extracts shimmer
shimmer = call(
[input_sound, point_process],
"Get shimmer (local)",
0,
0,
0.0001,
0.02,
1.3,
1.6,
)
# extracts HNR
harmonicity = call(input_sound, "To Harmonicity (cc)", 0.01, 75.0, 0.1, 1.0)
hnr = call(harmonicity, "Get mean", 0, 0)
# extracts speaking rate
transcript = get_transcript("transcripts.txt", file_name)
num_words = len(transcript.split())
speaking_rate = num_words / duration
# assembles the table
metrics = [
file_name,
round(minF0, 3),
round(maxF0, 3),
round(avgF0, 3),
round(min_intensity, 3),
round(max_intensity, 3),
round(avg_intensity, 3),
round(jitter, 3),
round(shimmer, 3),
round(hnr, 3),
round(speaking_rate, 3),
]
table.append(metrics)
# prints the results
print(tabulate(table, headers=first_row))
if __name__ == "__main__":
main()