coqui-ai · eginhard · Oct 31, 2023 · Oct 31, 2023 · Oct 31, 2023
diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py
@@ -19,13 +19,13 @@
 # pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
 """ voxceleb 1 & 2 """
 
+import csv
 import hashlib
 import os
 import subprocess
 import sys
 import zipfile
 
-import pandas
 import soundfile as sf
 from absl import logging
 
@@ -185,8 +185,11 @@ def convert_audio_and_make_label(input_dir, subset, output_dir, output_file):
     # Write to CSV file which contains four columns:
     # "wav_filename", "wav_length_ms", "speaker_id", "speaker_name".
     csv_file_path = os.path.join(output_dir, output_file)
-    df = pandas.DataFrame(data=files, columns=["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"])
-    df.to_csv(csv_file_path, index=False, sep="\t")
+    with open(csv_file_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f, delimiter="\t")
+        writer.writerow(["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"])
+        for wav_file in files:
+            writer.writerow(wav_file)
     logging.info("Successfully generated csv file {}".format(csv_file_path))
 
 

diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
@@ -1,11 +1,11 @@
+import csv
 import os
 import re
 import xml.etree.ElementTree as ET
 from glob import glob
 from pathlib import Path
 from typing import List
 
-import pandas as pd
 from tqdm import tqdm
 
 ########################
@@ -25,25 +25,27 @@ def cml_tts(root_path, meta_file, ignored_speakers=None):
         if len(line.split("|")) != num_cols:
             print(f" > Missing column in line {idx + 1} -> {line.strip()}")
     # load metadata
-    metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
-    assert all(x in metadata.columns for x in ["wav_filename", "transcript"])
-    client_id = None if "client_id" in metadata.columns else "default"
-    emotion_name = None if "emotion_name" in metadata.columns else "neutral"
+    with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f, delimiter="|")
+        metadata = list(reader)
+    assert all(x in metadata[0] for x in ["wav_filename", "transcript"])
+    client_id = None if "client_id" in metadata[0] else "default"
+    emotion_name = None if "emotion_name" in metadata[0] else "neutral"
     items = []
     not_found_counter = 0
-    for row in metadata.itertuples():
-        if client_id is None and ignored_speakers is not None and row.client_id in ignored_speakers:
+    for row in metadata:
+        if client_id is None and ignored_speakers is not None and row["client_id"] in ignored_speakers:
             continue
-        audio_path = os.path.join(root_path, row.wav_filename)
+        audio_path = os.path.join(root_path, row["wav_filename"])
         if not os.path.exists(audio_path):
             not_found_counter += 1
             continue
         items.append(
             {
-                "text": row.transcript,
+                "text": row["transcript"],
                 "audio_file": audio_path,
-                "speaker_name": client_id if client_id is not None else row.client_id,
-                "emotion_name": emotion_name if emotion_name is not None else row.emotion_name,
+                "speaker_name": client_id if client_id is not None else row["client_id"],
+                "emotion_name": emotion_name if emotion_name is not None else row["emotion_name"],
                 "root_path": root_path,
             }
         )
@@ -63,25 +65,27 @@ def coqui(root_path, meta_file, ignored_speakers=None):
         if len(line.split("|")) != num_cols:
             print(f" > Missing column in line {idx + 1} -> {line.strip()}")
     # load metadata
-    metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
-    assert all(x in metadata.columns for x in ["audio_file", "text"])
-    speaker_name = None if "speaker_name" in metadata.columns else "coqui"
-    emotion_name = None if "emotion_name" in metadata.columns else "neutral"
+    with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f, delimiter="|")
+        metadata = list(reader)
+    assert all(x in metadata[0] for x in ["audio_file", "text"])
+    speaker_name = None if "speaker_name" in metadata[0] else "coqui"
+    emotion_name = None if "emotion_name" in metadata[0] else "neutral"
     items = []
     not_found_counter = 0
-    for row in metadata.itertuples():
-        if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers:
+    for row in metadata:
+        if speaker_name is None and ignored_speakers is not None and row["speaker_name"] in ignored_speakers:
             continue
-        audio_path = os.path.join(root_path, row.audio_file)
+        audio_path = os.path.join(root_path, row["audio_file"])
         if not os.path.exists(audio_path):
             not_found_counter += 1
             continue
         items.append(
             {
-                "text": row.text,
+                "text": row["text"],
                 "audio_file": audio_path,
-                "speaker_name": speaker_name if speaker_name is not None else row.speaker_name,
-                "emotion_name": emotion_name if emotion_name is not None else row.emotion_name,
+                "speaker_name": speaker_name if speaker_name is not None else row["speaker_name"],
+                "emotion_name": emotion_name if emotion_name is not None else row["emotion_name"],
                 "root_path": root_path,
             }
         )

diff --git a/requirements.notebooks.txt b/requirements.notebooks.txt
@@ -1 +1,2 @@
-bokeh==1.4.0
+bokeh==1.4.0
+pandas>=1.4,<2.0
diff --git a/requirements.txt b/requirements.txt
@@ -8,8 +8,7 @@ torchaudio
 soundfile==0.12.*
 librosa==0.10.*
 scikit-learn==1.3.0
-numba==0.55.1;python_version<"3.9"
-numba==0.57.0;python_version>="3.9"
+numba==0.57.0
 inflect==5.6.*
 tqdm==4.64.*
 anyascii==0.3.*
@@ -23,7 +22,6 @@ flask==2.*
 pysbd==0.3.4
 # deps for notebooks
 umap-learn==0.5.*
-pandas>=1.4,<2.0
 # deps for training
 matplotlib==3.7.*
 # coqui stack