Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

*.pyc
*.pt
waveglow
Empty file added config/__init__.py
Empty file.
74 changes: 74 additions & 0 deletions config/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
epochs: 500
iters_per_checkpoint: 1000
seed: 1234
dynamic_loss_scaling: True
fp16_run: False
distributed_run: False
dist_backend: "nccl"
dist_url: "tcp://localhost:54321"
cudnn_enabled: True
cudnn_benchmark: False
ignore_layers: ['embedding.weight']

################################
# Data Parameters #
################################
load_mel_from_disk: False
training_files: 'filelists/ljs_audio_text_train_filelist.txt'
validation_files: 'filelists/ljs_audio_text_val_filelist.txt'
text_cleaners: ['english_cleaners']

################################
# Audio Parameters #
################################
max_wav_value: 32768.0
sampling_rate: 22050
filter_length: 1024
hop_length: 256
win_length: 1024
n_mel_channels: 80
mel_fmin: 0.0
mel_fmax: 8000.0

################################
# Model Parameters #
################################
n_symbols: 148
symbols_embedding_dim: 512

# Encoder parameters
encoder_kernel_size: 5
encoder_n_convolutions: 3
encoder_embedding_dim: 512

# Decoder parameters
n_frames_per_step: 1 # currently only 1 is supported
decoder_rnn_dim: 1024
prenet_dim: 256
max_decoder_steps: 1000
gate_threshold: 0.5
p_attention_dropout: 0.1
p_decoder_dropout: 0.1

# Attention parameters
attention_rnn_dim: 1024
attention_dim: 128

# Location Layer parameters
attention_location_n_filters: 32
attention_location_kernel_size: 31

# Mel-post processing network parameters
postnet_embedding_dim: 512
postnet_kernel_size: 5
postnet_n_convolutions: 5

################################
# Optimization Hyperparameters #
################################
use_saved_learning_rate: False
learning_rate: 1e-3
weight_decay: 1e-6
grad_clip_thresh: 1.0
batch_size: 64
mask_padding: True # set model's padded outputs to padded values
65 changes: 65 additions & 0 deletions config/hparams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import yaml


def load_hparam_str(hp_str):
path = "temp-restore.yaml"
with open(path, "w") as f:
f.write(hp_str)
ret = HParam(path)
os.remove(path)
return ret


def load_hparam(filename):
stream = open(filename, "r")
docs = yaml.load_all(stream, Loader=yaml.Loader)
hparam_dict = dict()
for doc in docs:
for k, v in doc.items():
hparam_dict[k] = v
return hparam_dict


def merge_dict(user, default):
if isinstance(user, dict) and isinstance(default, dict):
for k, v in default.items():
if k not in user:
user[k] = v
else:
user[k] = merge_dict(user[k], v)
return user


class Dotdict(dict):
"""
a dictionary that supports dot notation
as well as dictionary access notation
usage: d = DotDict() or d = DotDict({'val1':'first'})
set attributes: d.val2 = 'second' or d['val2'] = 'second'
get attributes: d.val2 or d['val2']
"""

__getattr__ = dict.__getitem__
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__

def __init__(self, dct=None):
dct = dict() if not dct else dct
for key, value in dct.items():
if hasattr(value, "keys"):
value = Dotdict(value)
self[key] = value


class HParam(Dotdict):
def __init__(self, file):
super(Dotdict, self).__init__()
hp_dict = load_hparam(file)
hp_dotdict = Dotdict(hp_dict)
for k, v in hp_dotdict.items():
setattr(self, k, v)

__getattr__ = Dotdict.__getitem__
__setattr__ = Dotdict.__setitem__
__delattr__ = Dotdict.__delitem__
95 changes: 0 additions & 95 deletions hparams.py

This file was deleted.

78 changes: 46 additions & 32 deletions inference.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
matplotlib==2.1.0
tensorflow==1.15.2
numpy==1.13.3
inflect==0.2.5
librosa==0.6.0

@sabard sabard Dec 20, 2022

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pyyaml package is also required here

Expand Down
8 changes: 5 additions & 3 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from data_utils import TextMelLoader, TextMelCollate
from loss_function import Tacotron2Loss
from logger import Tacotron2Logger
from hparams import create_hparams
from config.hparams import HParam


def reduce_tensor(tensor, n_gpus):
Expand Down Expand Up @@ -272,10 +272,12 @@ def train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus,
parser.add_argument('--group_name', type=str, default='group_name',
required=False, help='Distributed group name')
parser.add_argument('--hparams', type=str,
required=False, help='comma separated name=value pairs')
required=True, help='path to the yaml config file ')

args = parser.parse_args()
hparams = create_hparams(args.hparams)
hparams = HParam(args.hparams)
with open(args.hparams, "r") as f:
hp_str = "".join(f.readlines())

torch.backends.cudnn.enabled = hparams.cudnn_enabled
torch.backends.cudnn.benchmark = hparams.cudnn_benchmark
Expand Down