From 9710c42403167f57de6a3445810d94ed5291a3f1 Mon Sep 17 00:00:00 2001
From: Albert Zeyer <albert@nnaisense.com>
Date: Thu, 22 Feb 2018 12:19:14 +0100
Subject: [PATCH] patch audioread

---
 GeneratingDataset.py  |  5 +++--
 Util.py               | 14 ++++++++++++++
 tools/dump-dataset.py |  5 +++--
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/GeneratingDataset.py b/GeneratingDataset.py
index 4ec085ce63..c9f0c08cce 100644
--- a/GeneratingDataset.py
+++ b/GeneratingDataset.py
@@ -1524,8 +1524,9 @@ def __init__(self, path, prefix, bpe, audio, partition_epoch=None, fixed_random_
     self.prefix = prefix
     assert prefix in ["train", "dev", "eval"]
     assert os.path.exists(path + "/train-clean-100")
-    from Util import monkeyfix_glib
-    monkeyfix_glib()
+    import Util
+    Util.monkeyfix_glib()
+    Util.monkeypatch_audioread()
     self.bpe = BytePairEncoding(**bpe)
     self.labels = self.bpe.labels
     self._fixed_random_seed = fixed_random_seed
diff --git a/Util.py b/Util.py
index d72ba0febb..a42669fc84 100644
--- a/Util.py
+++ b/Util.py
@@ -2586,3 +2586,17 @@ def monkeyfix_glib():
   # and then reraise a KeyboardInterrupt in that thread.
   # However, we want and expect to get the KeyboardInterrupt in the main thread.
   GLib.MainLoop.__init__ = lambda *args, **kwargs: None
+
+
+def monkeypatch_audioread():
+  """
+  audioread does not behave optimal in some cases.
+  E.g. each call to _ca_available() takes quite long because of the ctypes.util.find_library usage.
+  We will patch this.
+  """
+  try:
+    import audioread
+  except ImportError:
+    return
+  res = audioread._ca_available()
+  audioread._ca_available = lambda: res
diff --git a/tools/dump-dataset.py b/tools/dump-dataset.py
index 337a3d7839..fe0a461a4f 100755
--- a/tools/dump-dataset.py
+++ b/tools/dump-dataset.py
@@ -93,7 +93,8 @@ def dump_dataset(dataset, options):
         num_seqs_s = "~%i" % dataset.estimated_num_seqs
       except TypeError:  # a number is required, not NoneType
         num_seqs_s = "?"
-    progress = "%i/%s (%.02f%%)" % (seq_idx, num_seqs_s, complete_frac * 100)
+    progress_prefix = "%i/%s" % (seq_idx, num_seqs_s)
+    progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
     if complete_frac > 0:
       total_time_estimated = start_elapsed / complete_frac
       remaining_estimated = total_time_estimated - start_elapsed
@@ -121,7 +122,7 @@ def dump_dataset(dataset, options):
     if stats:
       stats.collect(data)
     if options.type == "null":
-      Util.progress_bar_with_time(complete_frac)
+      Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)
 
     seq_idx += 1