| { |
| "env_cls": "qiskit_gym.envs.synthesis.PermutationEnv", |
| "env": { |
| "num_qubits": 12, |
| "difficulty": 1, |
| "gateset": [ |
| [ |
| "SWAP", |
| [ |
| 0, |
| 1 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 0, |
| 11 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 1, |
| 2 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 2, |
| 3 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 3, |
| 4 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 4, |
| 5 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 5, |
| 6 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 6, |
| 7 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 7, |
| 8 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 8, |
| 9 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 9, |
| 10 |
| ] |
| ], |
| [ |
| "SWAP", |
| [ |
| 10, |
| 11 |
| ] |
| ] |
| ], |
| "depth_slope": 2, |
| "max_depth": 256, |
| "metrics_weights": { |
| "n_cnots": 0.01, |
| "n_layers_cnots": 0.01, |
| "n_layers": 0.01, |
| "n_gates": 0.01 |
| }, |
| "add_perms": false |
| }, |
| "policy_cls": "twisterl.nn.BasicPolicy", |
| "policy": { |
| "embedding_size": 512, |
| "common_layers": [ |
| 256 |
| ], |
| "policy_layers": [], |
| "value_layers": [] |
| }, |
| "algorithm_cls": "twisterl.rl.PPO", |
| "algorithm": { |
| "collecting": { |
| "num_cores": 32, |
| "num_episodes": 1024, |
| "lambda": 0.995, |
| "gamma": 0.995 |
| }, |
| "training": { |
| "num_epochs": 10, |
| "vf_coef": 0.8, |
| "ent_coef": 0.01, |
| "clip_ratio": 0.1, |
| "normalize_advantage": false |
| }, |
| "learning": { |
| "diff_threshold": 0.85, |
| "diff_max": 512, |
| "diff_metric": "ppo_deterministic" |
| }, |
| "optimizer": { |
| "lr": 0.0003 |
| }, |
| "evals": { |
| "ppo_deterministic": { |
| "num_episodes": 100, |
| "deterministic": true, |
| "num_searches": 1, |
| "num_mcts_searches": 0, |
| "num_cores": 32, |
| "C": 1.41 |
| }, |
| "ppo_10": { |
| "num_episodes": 100, |
| "deterministic": false, |
| "num_searches": 10, |
| "num_mcts_searches": 0, |
| "num_cores": 32, |
| "C": 1.41 |
| } |
| }, |
| "logging": { |
| "log_freq": 1, |
| "checkpoint_freq": 10 |
| } |
| } |
| } |