| { | |
| "dataset_name": "c4", | |
| "max_seq_len": 64, | |
| "enc_dec_model": "facebook/bart-base", | |
| "train_batch_size": 64, | |
| "eval_batch_size": 32, | |
| "num_encoder_latents": 32, | |
| "num_decoder_latents": 32, | |
| "dim_ae": 64, | |
| "num_layers": 3, | |
| "l2_normalize_latents": true, | |
| "output_dir": "saved_latent_models/c4/2024-09-22_02-30-27", | |
| "save_dir": "saved_latent_models", | |
| "learning_rate": 0.0001, | |
| "num_train_steps": 50000, | |
| "lr_schedule": "linear", | |
| "lr_warmup_steps": 1000, | |
| "optimizer": "adamw", | |
| "adam_beta1": 0.9, | |
| "adam_beta2": 0.999, | |
| "adam_weight_decay": 0.01, | |
| "eval_every": 10000, | |
| "mixed_precision": "no", | |
| "wandb_name": "bart-roc-l2norm-test-32-64", | |
| "lm_mode": "freeze", | |
| "eval": false, | |
| "resume_training": false, | |
| "resume_dir": null, | |
| "num_devices": 1 | |
| } | |