Skip to content

Commit

Permalink
0.92.2
Browse files Browse the repository at this point in the history
  • Loading branch information
FBurkhardt committed Nov 8, 2024
1 parent e48e415 commit d72de9e
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

Version 0.92.2
--------------
* added some output to automatic speaker id

Version 0.92.1
--------------
* added a speaker plot to pyannote results
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.92.1"
VERSION="0.92.2"
SAMPLING_RATE = 16000
2 changes: 1 addition & 1 deletion nkululeko/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def plot_speakers(self, df, sample_selection):
# one up because of the runs
fig_dir = self.util.get_path("fig_dir") + "../"
sns.set_style("whitegrid") # Set style for chart
ax = df["speaker"].value_counts().plot(kind="pie")
ax = df["speaker"].value_counts().plot(kind="pie", autopct="%1.1f%%")
title = f"Speaker distr. for {sample_selection} {df.shape[0]}."
ax.set_title(title)
fig = ax.figure
Expand Down
58 changes: 34 additions & 24 deletions nkululeko/segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import pandas as pd

import audformat

from nkululeko.constants import VERSION
from nkululeko.experiment import Experiment
import nkululeko.glob_conf as glob_conf
Expand Down Expand Up @@ -76,19 +78,33 @@ def main():
f"unknown segmentation selection specifier {sample_selection},"
" should be [all | train | test]"
)
util.debug(f"segmenting {sample_selection}: {df.shape[0]} samples with {method}")
if method == "silero":
from nkululeko.segmenting.seg_silero import Silero_segmenter

segmenter = Silero_segmenter()
df_seg = segmenter.segment_dataframe(df)
elif method == "pyannote":
from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter

segmenter = Pyannote_segmenter(config)
df_seg = segmenter.segment_dataframe(df)
result_file = f"{expr.data_dir}/{segmented_file}"
if os.path.exists(result_file):
util.debug(f"reusing existing result file: {result_file}")
df_seg = audformat.utils.read_csv(result_file)
else:
util.error(f"unknown segmenter: {method}")
util.debug(
f"segmenting {sample_selection}: {df.shape[0]} samples with {method}"
)
if method == "silero":
from nkululeko.segmenting.seg_silero import Silero_segmenter

segmenter = Silero_segmenter()
df_seg = segmenter.segment_dataframe(df)
elif method == "pyannote":
from nkululeko.segmenting.seg_pyannote import Pyannote_segmenter

segmenter = Pyannote_segmenter(config)
df_seg = segmenter.segment_dataframe(df)
else:
util.error(f"unknown segmenter: {method}")
# remove encoded labels
target = util.config_val("DATA", "target", None)
if "class_label" in df_seg.columns:
df_seg = df_seg.drop(columns=[target])
df_seg = df_seg.rename(columns={"class_label": target})
# save file
df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")

def calc_dur(x):
starts = x[1]
Expand All @@ -100,6 +116,11 @@ def calc_dur(x):
df_seg["duration"] = df_seg.index.to_series().map(lambda x: calc_dur(x))
num_before = df.shape[0]
num_after = df_seg.shape[0]
util.debug(
f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
f" {num_before})"
)

# plot distributions
from nkululeko.plots import Plots

Expand All @@ -111,20 +132,9 @@ def calc_dur(x):
df_seg, "segmented_durations", sample_selection, caption="Segmented durations"
)
if method == "pyannote":
util.debug(df_seg[["speaker", "duration"]].groupby(["speaker"]).sum())
plots.plot_speakers(df_seg, sample_selection)

print("")
# remove encoded labels
target = util.config_val("DATA", "target", None)
if "class_label" in df_seg.columns:
df_seg = df_seg.drop(columns=[target])
df_seg = df_seg.rename(columns={"class_label": target})
# save file
df_seg.to_csv(f"{expr.data_dir}/{segmented_file}")
util.debug(
f"saved {segmented_file} to {expr.data_dir}, {num_after} samples (was"
f" {num_before})"
)
glob_conf.report.add_item(
ReportItem(
"Data",
Expand Down

0 comments on commit d72de9e

Please sign in to comment.