|
|
--- |
|
|
license: apache-2.0 |
|
|
language: |
|
|
- en |
|
|
pipeline_tag: text-generation |
|
|
tags: |
|
|
- miniGPT |
|
|
- Decoder |
|
|
- Transformer |
|
|
- nanoGPT |
|
|
- TextGeneration |
|
|
- 350mb |
|
|
- tiny |
|
|
- finetune |
|
|
- tinyGPT |
|
|
--- |
|
|
readme_text = """ |
|
|
# MiniGPT (WikiText-103) |
|
|
|
|
|
This is a **MiniGPT** model built from scratch in PyTorch and trained on the WikiText-103 dataset. |
|
|
|
|
|
## Files |
|
|
- `mini_gpt_best.pt` → model checkpoint |
|
|
- `config.json` → model configuration |
|
|
- `vocab.json` → tokenizer vocabulary |
|
|
|
|
|
## Training |
|
|
- Epochs: 5 |
|
|
- Sequence length: 128 |
|
|
- Train PPL: 1.18 |
|
|
- Validation PPL: 1.17 |
|
|
|
|
|
## Usage |
|
|
```python |
|
|
import torch |
|
|
from model import MiniGPT # your model definition |
|
|
import json |
|
|
|
|
|
# load vocab |
|
|
with open("vocab.json") as f: |
|
|
vocab = json.load(f) |
|
|
inv_vocab = {idx: word for word, idx in vocab.items()} |
|
|
|
|
|
# load model |
|
|
model = MiniGPT(**json.load(open("config.json"))) |
|
|
model.load_state_dict(torch.load("mini_gpt_best.pt")) |
|
|
model.eval(). |
|
|
|
|
|
|
|
|
|