{ "architectures": [ "Transformer" ], "bias": true, "d_hidden": 2048, "d_model": 512, "mask": "causal", "n_ctx": 512, "n_head": 8, "n_layer": 6, "norm": true, "tokenizer": "simple", "torch_dtype": "float32", "transformers_version": "4.51.1" }