shaunxsyang commited on
Commit
cc3d30b
·
verified ·
1 Parent(s): dba98db

Upload 5Hz_16rvq_c4096_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. 5Hz_16rvq_c4096_config.json +172 -0
5Hz_16rvq_c4096_config.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer_type": "TokenizerGanArkTrainer",
3
+ "synthesizer_type": "TokenizerSynthesizer",
4
+ "model": {
5
+ "type": "TokenizerGANWrapper",
6
+ "tokenizer_type": "mimicodec_vq",
7
+ "tokenizer_conf": {
8
+ "seanet_conf": {
9
+ "channels": 1,
10
+ "dimension": 512,
11
+ "causal": false,
12
+ "n_filters": 64,
13
+ "n_residual_layers": 1,
14
+ "activation": "ELU",
15
+ "compress": 2,
16
+ "dilation_base": 2,
17
+ "disable_norm_outer_blocks": 0,
18
+ "kernel_size": 7,
19
+ "residual_kernel_size": 3,
20
+ "last_kernel_size": 3,
21
+ "norm": "none",
22
+ "pad_mode": "constant",
23
+ "ratios": [
24
+ 8,
25
+ 5,
26
+ 5,
27
+ 4,
28
+ 4
29
+ ],
30
+ "true_skip": true
31
+ },
32
+ "transformer_conf": {
33
+ "d_model": 512,
34
+ "num_heads": 8,
35
+ "num_layers": 8,
36
+ "causal": false,
37
+ "layer_scale": 0.01,
38
+ "context": 1000,
39
+ "conv_layout": true,
40
+ "max_period": 10000,
41
+ "gating": "none",
42
+ "norm": "layer_norm",
43
+ "positional_embedding": "rope",
44
+ "dim_feedforward": 2048
45
+ },
46
+ "vq_conf": {
47
+ "num_quantizers": 32,
48
+ "dim": 512,
49
+ "codebook_size": 256,
50
+ "codebook_dim": 8,
51
+ "threshold_ema_dead_code": 2,
52
+ "commitment": 0.25,
53
+ "weight_init": false,
54
+ "full_commit_loss": false
55
+ },
56
+ "ds_rate": 1,
57
+ "sample_rate": 16000
58
+ },
59
+ "wave_decoder_type": "mimicodec_dec",
60
+ "wave_decoder_conf": {
61
+ "in_channels": 512,
62
+ "seanet_conf": {
63
+ "channels": 1,
64
+ "dimension": 512,
65
+ "causal": false,
66
+ "n_filters": 64,
67
+ "n_residual_layers": 1,
68
+ "activation": "ELU",
69
+ "compress": 2,
70
+ "dilation_base": 2,
71
+ "disable_norm_outer_blocks": 0,
72
+ "kernel_size": 7,
73
+ "residual_kernel_size": 3,
74
+ "last_kernel_size": 3,
75
+ "norm": "none",
76
+ "pad_mode": "constant",
77
+ "ratios": [
78
+ 8,
79
+ 5,
80
+ 5,
81
+ 4,
82
+ 4
83
+ ],
84
+ "true_skip": true
85
+ },
86
+ "transformer_conf": {
87
+ "d_model": 512,
88
+ "num_heads": 8,
89
+ "num_layers": 8,
90
+ "causal": false,
91
+ "layer_scale": 0.01,
92
+ "context": 1000,
93
+ "conv_layout": true,
94
+ "max_period": 10000,
95
+ "gating": "none",
96
+ "norm": "layer_norm",
97
+ "positional_embedding": "rope",
98
+ "dim_feedforward": 2048
99
+ },
100
+ "up_rate": 1
101
+ },
102
+ "wav_input_sr": 16000,
103
+ "discriminators": {
104
+ "type": [
105
+ "HiFiGANMultiPeriodDiscriminator",
106
+ "SpecDiscriminator"
107
+ ],
108
+ "HiFiGANMultiPeriodDiscriminator": {
109
+ "period_sizes": [
110
+ 2,
111
+ 3,
112
+ 5,
113
+ 7,
114
+ 11
115
+ ],
116
+ "max_downsample_channels": 512,
117
+ "channels": 16,
118
+ "channel_increasing_factor": 4
119
+ },
120
+ "SpecDiscriminator": {
121
+ "stft_params": {
122
+ "fft_sizes": [
123
+ 78,
124
+ 126,
125
+ 206,
126
+ 334,
127
+ 542,
128
+ 876,
129
+ 1418,
130
+ 2296
131
+ ],
132
+ "hop_sizes": [
133
+ 39,
134
+ 63,
135
+ 103,
136
+ 167,
137
+ 271,
138
+ 438,
139
+ 709,
140
+ 1148
141
+ ],
142
+ "win_lengths": [
143
+ 78,
144
+ 126,
145
+ 206,
146
+ 334,
147
+ 542,
148
+ 876,
149
+ 1418,
150
+ 2296
151
+ ],
152
+ "window": "hann_window"
153
+ },
154
+ "in_channels": 1,
155
+ "out_channels": 1,
156
+ "kernel_sizes": [
157
+ 5,
158
+ 3
159
+ ],
160
+ "channels": 32,
161
+ "max_downsample_channels": 512,
162
+ "downsample_scales": [
163
+ 2,
164
+ 2,
165
+ 2
166
+ ],
167
+ "use_weight_norm": true
168
+ }
169
+ }
170
+ }
171
+
172
+ }