codefuse-admin's picture
upload model weights files
e6067cc verified
{
"activation_dropout": 0.0,
"attention_dropout": 0.0,
"attn_cfg": {
"ffn_expand_ratio": 1.3333333333333333,
"num_heads": null,
"num_heads_k": null,
"num_heads_v": null,
"rotary_emb_dim": -1,
"rotary_emb_scale_base": null,
"window_size": 128
},
"d_model": 2048,
"dropout": 0.0,
"initializer_range": 0.02,
"max_position_embeddings": 2048,
"mixer_cfg": {
"expand_ratio": 2.0,
"input_gate_low_rank": "auto",
"mem_size": 64,
"mode": "fused_chunk"
},
"model_type": "rodimus",
"n_layer": 48,
"norm_epsilon": 1e-05,
"residual_in_fp32": true,
"tie_word_embeddings": true,
"transformers_version": "4.37.2",
"use_cache": true,
"use_fast_path": true,
"vocab_size": 126464,
"eos_token_id": 126081,
"bos_token_id": 126080,
"pad_token_id": null,
"block_type": "rodimus",
"auto_map": {
"AutoConfig": "configuration_rodimus.RodimusConfig",
"AutoModel": "modeling_rodimus.RodimusForCausalLM",
"AutoModelForCausalLM": "modeling_rodimus.RodimusForCausalLM"
},
"torch_dtype": "float16",
"architectures": [
"RodimusForCausalLM"
]
}