search.py

"""NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
Parag K. Mital
"""
import os
import glob
import numpy as np
from audio_style_transfer.models import timedomain, uylanov


def get_path(model, output_path, content_filename, style_filename):
    output_dir = os.path.join(output_path, model)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_filename = '{}/{}/{}+{}'.format(output_path, model,
                                           content_filename.split('/')[-1],
                                           style_filename.split('/')[-1])
    return output_filename


def params():
    n_fft = [2048, 4096, 8196]
    n_layers = [1, 2, 4]
    n_filters = [128, 2048, 4096]
    hop_length = [128, 256, 512]
    alpha = [0.1, 0.01, 0.005]
    k_w = [4, 8, 12]
    norm = [True, False]
    input_features = [['mags'], ['mags', 'phase'], ['real', 'imag'], ['real', 'imag', 'mags']]
    return locals()


def batch(content_path, style_path, output_path, run_timedomain=True, run_uylanov=False):
    content_files = glob.glob('{}/*.wav'.format(content_path))
    style_files = glob.glob('{}/*.wav'.format(style_path))
    content_filename = np.random.choice(content_files)
    style_filename = np.random.choice(style_files)
    alpha = np.random.choice(params()['alpha'])
    n_fft = np.random.choice(params()['n_fft'])
    n_layers = np.random.choice(params()['n_layers'])
    n_filters = np.random.choice(params()['n_filters'])
    hop_length = np.random.choice(params()['hop_length'])
    norm = np.random.choice(params()['norm'])
    k_w = np.random.choice(params()['k_w'])

    # Run the Time Domain Model
    if run_timedomain:
        for f in params()['input_features']:
            fname = get_path('timedomain/input_features={}'.format(",".join(f)),
                             output_path, content_filename, style_filename)
            output_filename = ('{},n_fft={},n_layers={},n_filters={},norm={},'
                               'hop_length={},alpha={},k_w={}.wav'.format(
                                   fname, n_fft, n_layers, n_filters, norm,
                                   hop_length, alpha, k_w))
            print(output_filename)
            if not os.path.exists(output_filename):
                timedomain.run(content_fname=content_filename,
                               style_fname=style_filename,
                               output_fname=output_filename,
                               n_fft=n_fft,
                               n_layers=n_layers,
                               n_filters=n_filters,
                               hop_length=hop_length,
                               alpha=alpha,
                               norm=norm,
                               k_w=k_w)

    if run_uylanov:
        # Run Original Uylanov Model
        fname = get_path('uylanov', output_path, content_filename, style_filename)
        output_filename = ('{},n_fft={},n_layers={},n_filters={},'
                           'hop_length={},alpha={},k_w={}.wav'.format(
                               fname, n_fft, n_layers, n_filters, hop_length, alpha,
                               k_w))
        print(output_filename)
        if not os.path.exists(output_filename):
            uylanov.run(content_filename,
                        style_filename,
                        output_filename,
                        n_fft=n_fft,
                        n_layers=n_layers,
                        n_filters=n_filters,
                        hop_length=hop_length,
                        alpha=alpha,
                        k_w=k_w)

    # These only produce noise so they are commented
    # # Run NSynth Encoder Model
    # output_filename = get_path('nsynth-encoder', output_path, content_filename,
    #                            style_filename)
    # output_filename = ('{},n_fft={},n_layers={},n_filters={},'
    #                    'hop_length={},alpha={},k_w={}.wav'.format(
    #                        fname, n_fft, n_layers, n_filters, hop_length, alpha, k_w))
    # if not os.path.exists(output_filename):
    #     nsynth.run(content_filename,
    #                style_filename,
    #                output_filename,
    #                model='encoder',
    #                n_fft=n_fft,
    #                n_layers=n_layers,
    #                n_filters=n_filters,
    #                hop_length=hop_length,
    #                alpha=alpha,
    #                k_w=k_w)
    # # Run NSynth Decoder Model
    # output_filename = get_path('wavenet-decoder', output_path, content_filename,
    #                            style_filename)
    # output_filename = ('{},n_fft={},n_layers={},n_filters={},'
    #                    'hop_length={},alpha={},k_w={}.wav'.format(
    #                        fname, n_fft, n_layers, n_filters, hop_length, alpha, k_w))
    # if not os.path.exists(output_filename):
    #     nsynth.run(content_filename,
    #                style_filename,
    #                output_filename,
    #                model='decoder',
    #                n_fft=n_fft,
    #                n_layers=n_layers,
    #                n_filters=n_filters,
    #                hop_length=hop_length,
    #                alpha=alpha,
    #                k_w=k_w)


if __name__ == '__main__':
    content_path = './target'
    style_path = './corpus'
    output_path = './results'
    batch(content_path, style_path, output_path)