{ "affine_momentum": 0.9, "architectures": [ "NeoLLMForCausalLM" ], "attention_bias": false, "attention_dropout": 0.1, "attn_res_num_blocks": 4, "auto_map": { "AutoConfig": "configuration_neollm.NeoLLMConfig", "AutoModel": "modeling_neollm.NeoLLMModel", "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM" }, "bos_token_id": 1, "directional_routing_k": 4, "directional_routing_temp": 3.0, "dropout_rate": 0.1, "dtype": "bfloat16", "eos_token_id": 7, "fan_ratio": 0.125, "fan_ratio_ffn": 0.0625, "generator_d_seed": 128, "generator_k": 3, "generator_krank": 64, "generator_num_knots": 32, "generator_num_modes": 8, "generator_spline_degree": 2, "head_dim": 64, "hidden_act": "xielu", "hidden_size": 512, "iha_local_global_pattern": "LLLLG", "iha_num_pseudo_heads": 2, "iha_sliding_window": null, "initializer_range": 0.02, "intermediate_size": 1536, "jtokm_aux_loss_weight": 0.0001, "jtokm_norm_eps": 1e-06, "jtokm_num_experts": 4, "jtokm_num_modes": 4, "jtokm_top_k": 2, "laurel_lr_rank": 32, "lucid_attention_eps": 1e-06, "max_position_embeddings": 512, "mea_component_key_value_heads": 4, "mea_groupnorm_eps": 1e-06, "model_type": "neollm", "momentum_gamma": 0.1, "num_attention_heads": 8, "num_hidden_layers": 12, "num_key_value_heads": 4, "num_mem_heads": 4, "pad_token_id": 0, "partial_rotary_factor": 0.25, "polynorm_exclusive": false, "repo_d_p": 64, "repo_goat_num_frequencies": 3, "repo_goat_sink_decay": 4.0, "repo_start_layer": 4, "rms_norm_eps": 1e-06, "rope_parameters": { "partial_rotary_factor": 0.25, "rope_theta": 10000.0, "rope_type": "default" }, "rope_theta": 10000.0, "siamese_attn_x_scale_init": 1.0, "siamese_depth_scaling": true, "siamese_normalized_input": true, "stack_d_model": 32, "stack_memory_cache_size": 2048, "stack_slots": 16, "tie_word_embeddings": false, "transformers_version": "5.5.3", "use_affine_scaled_attention": false, "use_attn_res": false, "use_cache": false, "use_directional_routing": false, "use_embedding_input_norm": true, "use_embedding_multipliers": false, "use_fan_residual": false, "use_gpas": false, "use_hadamard_o_proj": true, "use_iha": true, "use_jtokm": false, "use_laurel": false, "use_laurel_lr": false, "use_laurel_rw": false, "use_learnable_multipliers": true, "use_lns": false, "use_lucid_attention": false, "use_mea_attention": false, "use_momentum_attention": true, "use_repo": true, "use_repo_goat_prior": false, "use_repo_grape": true, "use_siamesenorm": true, "use_spelling_bee_embeddings": true, "use_stack_memory": false, "use_token_generator": false, "use_xsa": true, "vocab_size": 64402, "xsa_eps": 1e-06 }