ParsBench Leaderboard

This leaderboard is created using ParsBench framework benchmarking toolkit.

In this leaderboard we evaluate the open-weight LLMs based on the Persian (Farsi) language tasks.

Note: We've only added the GPT and Claude 3.5 Sonnet models evaluation for a better comparison between the open LLMs and SoTA models.

{

"headers": [
- "T",
- "Model",
- "Average ⬆️",
- "Persian MMLU",
- "Persian Math",
- "ParsiNLU Entailment",
- "FarsTail Entailment",
- "ParsiNLU Machine Translation Fa-En",
- "ParsiNLU Machine Translation En-Fa",
- "ParsiNLU Reading Comprehension",
- "Persian News Summary",
- "Type",
- "Architecture",
- "Precision",
- "Hub License",
- "#Params (B)",
- "Hub ❤️",
- "Available on the hub",
- "Model sha"
],
"data": [
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/gpt-4o" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-4o</a>",
  - 58.49,
  - 31,
  - 82.3,
  - 77.1,
  - 89.3,
  - 43.1,
  - 34.9,
  - 75.2,
  - 35,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/gpt-4-turbo" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-4-turbo</a>",
  - 55.31,
  - 13.5,
  - 81.1,
  - 75,
  - 82.8,
  - 39.9,
  - 37.7,
  - 77.7,
  - 34.8,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/gpt-4o-mini" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-4o-mini</a>",
  - 55.04,
  - 24,
  - 78.1,
  - 73.3,
  - 85,
  - 38.9,
  - 34.1,
  - 73.4,
  - 33.5,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/claude-3-5-sonnet-20240620" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">claude-3-5-sonnet-20240620</a>",
  - 54.71,
  - 50.5,
  - 85.1,
  - 85.1,
  - 94.4,
  - 18.1,
  - 21.6,
  - 49.6,
  - 33.3,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/JabirLLM-400B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">JabirLLM-400B</a>",
  - 54.21,
  - 29,
  - 52,
  - 67.5,
  - 89.7,
  - 40.5,
  - 36.6,
  - 77.7,
  - 40.7,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-72B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2-72B-Instruct</a>",
  - 52.32,
  - 28.5,
  - 73.7,
  - 79.3,
  - 83.3,
  - 30.8,
  - 23.6,
  - 69.4,
  - 30,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/google/gemma2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma2-9b-it</a>",
  - 51.82,
  - 30.5,
  - 70.2,
  - 60.9,
  - 78.7,
  - 30,
  - 30.4,
  - 77.9,
  - 36,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "?",
  - "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3-70B-Instruct</a>",
  - 50.78,
  - 37,
  - 68,
  - 54.2,
  - 73.8,
  - 25.4,
  - 32.6,
  - 79.4,
  - 35.8,
  - "",
  - "?",
  - "?",
  - "?",
  - 0,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2-7B-Instruct</a>",
  - 42.99,
  - 39.5,
  - 60,
  - 54.4,
  - 44.6,
  - 31.6,
  - 22.9,
  - 63.9,
  - 27,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/gpt-3.5-turbo" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-3.5-turbo</a>",
  - 39,
  - 3.5,
  - 58.9,
  - 43.2,
  - 36.6,
  - 36,
  - 34.3,
  - 68.1,
  - 31.4,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-23-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-23-8B</a>",
  - 38.78,
  - 28.5,
  - 36,
  - 39.5,
  - 46.7,
  - 35,
  - 31.8,
  - 67.5,
  - 25.2,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
  - 38.39,
  - 3.5,
  - 53.7,
  - 46.2,
  - 40.9,
  - 29,
  - 30.1,
  - 73.4,
  - 30.3,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3-8B-Instruct</a>",
  - 35.78,
  - 21.5,
  - 49.7,
  - 48.1,
  - 34.2,
  - 25.1,
  - 28.2,
  - 70.4,
  - 9,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/PartAI/Dorna-Llama3-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">PartAI/Dorna-Llama3-8B-Instruct</a>",
  - 35.35,
  - 23.5,
  - 42.3,
  - 41.1,
  - 40.8,
  - 24.6,
  - 26.5,
  - 61,
  - 23,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/MehdiHosseiniMoghadam/AVA-Llama-3-V2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MehdiHosseiniMoghadam/AVA-Llama-3-V2</a>",
  - 30.84,
  - 22.5,
  - 43.4,
  - 42.8,
  - 21.7,
  - 5.9,
  - 26,
  - 56.9,
  - 27.5,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ],
- [
  - "🟢",
  - "<a target="_blank" href="https://huggingface.co/universitytehran/PersianMind-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">universitytehran/PersianMind-v1.0</a>",
  - 26.44,
  - 0,
  - 37.6,
  - 1.1,
  - 31.3,
  - 26,
  - 29.6,
  - 50,
  - 35.9,
  - "pretrained",
  - "?",
  - "?",
  - "custom",
  - 0.1,
  - 0,
  - false,
  - ""
  ]
],
"metadata": null

}

model	revision	private	precision	weight_type	status
CohereForAI/aya-23-8B	main	false	?	Original	FINISHED
JabirLLM-400B	main	false	?	Original	FINISHED
MehdiHosseiniMoghadam/AVA-Llama-3-V2	main	false	?	Original	FINISHED
PartAI/Dorna-Llama3-8B-Instruct	main	false	?	Original	FINISHED
Qwen/Qwen2-72B-Instruct	main	false	?	Original	FINISHED
Qwen/Qwen2-7B-Instruct	main	false	?	Original	FINISHED
claude-3-5-sonnet-20240620	main	false	?	Original	FINISHED
google/gemma2-9b-it	main	false	?	Original	FINISHED
gpt-3.5-turbo	main	false	?	Original	FINISHED
gpt-4-turbo	main	false	?	Original	FINISHED
gpt-4o	main	false	?	Original	FINISHED
gpt-4o-mini	main	false	?	Original	FINISHED
meta-llama/Meta-Llama-3.1-8B-Instruct	main	false	?	Original	FINISHED
meta-llama/Meta-Llama-3-70B-Instruct	main	false	?	Original	FINISHED
meta-llama/Meta-Llama-3-8B-Instruct	main	false	?	Original	FINISHED
universitytehran/PersianMind-v1.0	main	false	?	Original	FINISHED

ParsBench Leaderboard

✉️✨ Submit your model here!