Skip to content

Commit

Permalink
0.92.1
Browse files Browse the repository at this point in the history
  • Loading branch information
FBurkhardt committed Nov 8, 2024
1 parent 451829a commit e48e415
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

Version 0.92.1
--------------
* added a speaker plot to pyannote results

Version 0.92.0
--------------
* added first version of automatic speaker prediction/segmentation
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.92.0"
VERSION="0.92.1"
SAMPLING_RATE = 16000
31 changes: 27 additions & 4 deletions nkululeko/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import seaborn as sns
from sklearn.manifold import TSNE

import nkululeko.glob_conf as glob_conf
import nkululeko.utils.stats as su
from nkululeko.reporting.defines import Header
from nkululeko.reporting.report_item import ReportItem
import nkululeko.utils.stats as su
from nkululeko.utils.util import Util


Expand All @@ -30,8 +30,6 @@ def plot_distributions_speaker(self, df):
df_speaker["samplenum"] = df_speaker.shape[0]
df_speakers = pd.concat([df_speakers, df_speaker.head(1)])
# plot the distribution of samples per speaker
# one up because of the runs
fig_dir = self.util.get_path("fig_dir") + "../"
self.util.debug("plotting samples per speaker")
if "gender" in df_speakers:
filename = "samples_value_counts"
Expand Down Expand Up @@ -319,6 +317,31 @@ def plot_durations(self, df, filename, sample_selection, caption=""):
img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
plt.savefig(img_path)
plt.close(fig)
self.util.debug(f"plotted durations to {img_path}")
glob_conf.report.add_item(
ReportItem(
Header.HEADER_EXPLORE,
caption,
title,
img_path,
)
)

def plot_speakers(self, df, sample_selection):
filename = "speakers"
caption = "speakers"
# one up because of the runs
fig_dir = self.util.get_path("fig_dir") + "../"
sns.set_style("whitegrid") # Set style for chart
ax = df["speaker"].value_counts().plot(kind="pie")
title = f"Speaker distr. for {sample_selection} {df.shape[0]}."
ax.set_title(title)
fig = ax.figure
# plt.tight_layout()
img_path = f"{fig_dir}{filename}_{sample_selection}.{self.format}"
plt.savefig(img_path)
plt.close(fig)
self.util.debug(f"plotted speakers to {img_path}")
glob_conf.report.add_item(
ReportItem(
Header.HEADER_EXPLORE,
Expand Down
13 changes: 8 additions & 5 deletions nkululeko/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def main():
# segment
segmented_file = util.config_val("SEGMENT", "result", "segmented.csv")

segmenter = util.config_val("SEGMENT", "method", "silero")
method = util.config_val("SEGMENT", "method", "silero")
sample_selection = util.config_val("SEGMENT", "sample_selection", "all")
if sample_selection == "all":
df = pd.concat([expr.df_train, expr.df_test])
Expand All @@ -76,19 +76,19 @@ def main():
f"unknown segmentation selection specifier {sample_selection},"
" should be [all | train | test]"
)
util.debug(f"segmenting {sample_selection}: {df.shape[0]} samples with {segmenter}")
if segmenter == "silero":
util.debug(f"segmenting {sample_selection}: {df.shape[0]} samples with {method}")
if method == "silero":
from nkululeko.segmenting.seg_silero import Silero_segmenter

segmenter = Silero_segmenter()
df_seg = segmenter.segment_dataframe(df)
elif segmenter == "pyannote":
elif method == "pyannote":
from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter

segmenter = Pyannote_segmenter(config)
df_seg = segmenter.segment_dataframe(df)
else:
util.error(f"unknown segmenter: {segmenter}")
util.error(f"unknown segmenter: {method}")

def calc_dur(x):
starts = x[1]
Expand All @@ -110,6 +110,9 @@ def calc_dur(x):
plots.plot_durations(
df_seg, "segmented_durations", sample_selection, caption="Segmented durations"
)
if method == "pyannote":
plots.plot_speakers(df_seg, sample_selection)

print("")
# remove encoded labels
target = util.config_val("DATA", "target", None)
Expand Down

0 comments on commit e48e415

Please sign in to comment.