| from transformers import PretrainedConfig | |
| class MinGRULMConfig(PretrainedConfig): | |
| model_type = "mingru" | |
| def __init__( | |
| self, | |
| vocab_size=50257, | |
| d_model=512, | |
| ff_mult=4, | |
| min_gru_expansion=1.5, | |
| expand=2.0, | |
| depth=12, | |
| n_layer=12, | |
| pad_vocab_size_multiple=8, | |
| initializer_range=0.02, | |
| hidden_size = 512, | |
| **kwargs, | |
| ): | |
| self.vocab_size = vocab_size | |
| self.d_model = d_model | |
| self.ff_mult = ff_mult | |
| self.min_gru_expansion = min_gru_expansion | |
| self.expand = expand | |
| self.depth = depth | |
| self.n_layer = n_layer | |
| self.hidden_size = hidden_size | |
| self.pad_vocab_size_multiple = pad_vocab_size_multiple | |
| self.initializer_range = initializer_range | |
| super().__init__(**kwargs) |