Skip to content

Commit

Permalink
0.74.5
Browse files Browse the repository at this point in the history
  • Loading branch information
FBurkhardt committed Dec 15, 2023
1 parent 9024a75 commit 3313d8a
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 54 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

Version 0.74.5
--------------
* added praat feature extractor for one sample

Version 0.74.4
--------------
* fixed bug combining augmentations
Expand Down
2 changes: 1 addition & 1 deletion nkululeko/constants.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
VERSION="0.74.4"
VERSION="0.74.5"
SAMPLING_RATE = 16000
7 changes: 7 additions & 0 deletions nkululeko/demo_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ def run_demo(self):

def predict_signal(self, signal, sr):
features = self.feature_extractor.extract_sample(signal, sr)
scale_feats = self.util.config_val("FEATS", "scale", False)
if scale_feats:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
features = scaler.fit_transform(features)
features = np.nan_to_num(features)
result_dict = self.model.predict_sample(features)
keys = result_dict.keys()
if self.label_encoder is not None:
Expand Down
24 changes: 21 additions & 3 deletions nkululeko/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,14 @@ def load_datasets(self):
dbs = ",".join(list(self.datasets.keys()))
labels = self.util.config_val("DATA", "labels", False)
if labels:
labels = ast.literal_eval(labels)
self.labels = ast.literal_eval(labels)
self.util.debug(f"Target labels (from config): {labels}")
else:
labels = list(next(iter(self.datasets.values())).df[self.target].unique())
self.labels = list(
next(iter(self.datasets.values())).df[self.target].unique()
)
self.util.debug(f"Target labels (from database): {labels}")
glob_conf.set_labels(labels)
glob_conf.set_labels(self.labels)
self.util.debug(f"loaded databases {dbs}")

def _import_csv(self, storage):
Expand Down Expand Up @@ -589,6 +591,7 @@ def run(self):
if save:
# save the experiment for future use
self.save(self.util.get_save_name())
# self.save_onnx(self.util.get_save_name())

# self.__collect_reports()
self.util.print_best_results(self.reports)
Expand Down Expand Up @@ -667,6 +670,7 @@ def load(self, filename):
tmp_dict = pickle.load(f)
f.close()
self.__dict__.update(tmp_dict)
glob_conf.set_labels(self.labels)

def save(self, filename):
try:
Expand All @@ -675,3 +679,17 @@ def save(self, filename):
f.close()
except (AttributeError, TypeError, RuntimeError) as error:
self.util.warn(f"Save experiment: Can't pickle local object: {error}")

def save_onnx(self, filename):
# export the model to onnx
model = self.runmgr.get_best_model()
if model.is_ANN():
print("converting to onnx from torch")
else:
from skl2onnx import to_onnx

print("converting to onnx from sklearn")
# save the rest
f = open(filename, "wb")
pickle.dump(self.__dict__, f)
f.close()
62 changes: 24 additions & 38 deletions nkululeko/feat_extract/feats_praat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from nkululeko.feat_extract.featureset import Featureset
import os
import pandas as pd
import numpy as np
import nkululeko.glob_conf as glob_conf
from nkululeko.feat_extract import feinberg_praat
import ast
Expand All @@ -23,14 +24,10 @@ def extract(self):
store = self.util.get_path("store")
store_format = self.util.config_val("FEATS", "store_format", "pkl")
storage = f"{store}{self.name}.{store_format}"
extract = self.util.config_val(
"FEATS", "needs_feature_extraction", False
)
extract = self.util.config_val("FEATS", "needs_feature_extraction", False)
no_reuse = eval(self.util.config_val("FEATS", "no_reuse", "False"))
if extract or no_reuse or not os.path.isfile(storage):
self.util.debug(
"extracting Praat features, this might take a while..."
)
self.util.debug("extracting Praat features, this might take a while...")
self.df = feinberg_praat.compute_features(self.data_df.index)
self.df = self.df.set_index(self.data_df.index)
for i, col in enumerate(self.df.columns):
Expand All @@ -53,36 +50,25 @@ def extract(self):
self.df = self.df.astype(float)

def extract_sample(self, signal, sr):
self.util.error(
"feats_praat: extracting single samples not implemented yet"
)
feats = None
return feats
import audiofile, audformat

# def filter(self):
# # use only the features that are indexed in the target dataframes
# self.df = self.df[self.df.index.isin(self.data_df.index)]
# try:
# # use only some features
# selected_features = ast.literal_eval(
# glob_conf.config["FEATS"]["praat.features"]
# )
# self.util.debug(
# f"selecting features from Praat: {selected_features}"
# )
# sel_feats_df = pd.DataFrame()
# hit = False
# for feat in selected_features:
# try:
# sel_feats_df[feat] = self.df[feat]
# hit = True
# except KeyError:
# pass
# if hit:
# self.df = sel_feats_df
# self.util.debug(
# "new feats shape after selecting Praat features:"
# f" {self.df.shape}"
# )
# except KeyError:
# pass
tmp_audio_names = ["praat_audio_tmp.wav"]
audiofile.write(tmp_audio_names[0], signal, sr)
df = pd.DataFrame(index=tmp_audio_names)
index = audformat.utils.to_segmented_index(df.index, allow_nat=False)
df = feinberg_praat.compute_features(index)
df.set_index(index)
for i, col in enumerate(df.columns):
if df[col].isnull().values.any():
self.util.debug(
f"{col} includes {df[col].isnull().sum()} nan,"
" inserting mean values"
)
mean_val = df[col].mean()
if not np.isnan(mean_val):
df[col] = df[col].fillna(mean_val)
else:
df[col] = df[col].fillna(0)
df = df.astype(float)
feats = df.to_numpy()
return feats
26 changes: 14 additions & 12 deletions nkululeko/feat_extract/feinberg_praat.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,28 +199,30 @@ def runPCA(df):
# pickle.dump(x, f)
# f.close()

x = StandardScaler().fit_transform(x)
if np.any(np.isnan(x)):
# x = StandardScaler().fit_transform(x)
if np.any(np.isnan(x[0])):
print(
f"Warning: {np.count_nonzero(np.isnan(x))} Nans in x, replacing" " with 0"
)
x[np.isnan(x)] = 0
if np.any(np.isfinite(x)):
print(f"Warning: {np.count_nonzero(np.isfinite(x))} infinite in x")
# if np.any(np.isfinite(x[0])):
# print(f"Warning: {np.count_nonzero(np.isfinite(x))} finite in x")

# PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(x)
if np.any(np.isnan(principalComponents)):
print("pc is nan")
print(f"count: {np.count_nonzero(np.isnan(principalComponents))}")
print(principalComponents)
principalComponents = np.nan_to_num(principalComponents)

try:
principalComponents = pca.fit_transform(x)
if np.any(np.isnan(principalComponents)):
print("pc is nan")
print(f"count: {np.count_nonzero(np.isnan(principalComponents))}")
print(principalComponents)
principalComponents = np.nan_to_num(principalComponents)
except ValueError:
print("need more than one file for pca")
principalComponents = [[0, 0]]
principalDf = pd.DataFrame(
data=principalComponents, columns=["JitterPCA", "ShimmerPCA"]
)

return principalDf


Expand Down
3 changes: 3 additions & 0 deletions nkululeko/models/model_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ def predict_sample(self, features):
def store(self):
torch.save(self.model.state_dict(), self.store_path)

def store_as_onnx(self):
pass

def load(self, run, epoch):
self.set_id(run, epoch)
dir = self.util.get_path("model_dir")
Expand Down

0 comments on commit 3313d8a

Please sign in to comment.