ParsBench Leaderboard

This leaderboard is created using ParsBench framework benchmarking toolkit.

In this leaderboard we evaluate the open-weight LLMs based on the Persian (Farsi) language tasks.

Note: We've only added the GPT and Claude 3.5 Sonnet models evaluation for a better comparison between the open LLMs and SoTA models.

{
  • "headers": [
    • "T",
    • "Model",
    • "Average โฌ†๏ธ",
    • "Persian MMLU",
    • "Persian Math",
    • "ParsiNLU Entailment",
    • "FarsTail Entailment",
    • "ParsiNLU Machine Translation Fa-En",
    • "ParsiNLU Machine Translation En-Fa",
    • "ParsiNLU Reading Comprehension",
    • "Persian News Summary",
    • "Type",
    • "Architecture",
    • "Precision",
    • "Hub License",
    • "#Params (B)",
    • "Hub โค๏ธ",
    • "Available on the hub",
    • "Model sha"
    ],
  • "data": [
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/gpt-4o" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-4o</a>",
      • 58.49,
      • 31,
      • 82.3,
      • 77.1,
      • 89.3,
      • 43.1,
      • 34.9,
      • 75.2,
      • 35,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/gpt-4-turbo" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-4-turbo</a>",
      • 55.31,
      • 13.5,
      • 81.1,
      • 75,
      • 82.8,
      • 39.9,
      • 37.7,
      • 77.7,
      • 34.8,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/gpt-4o-mini" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-4o-mini</a>",
      • 55.04,
      • 24,
      • 78.1,
      • 73.3,
      • 85,
      • 38.9,
      • 34.1,
      • 73.4,
      • 33.5,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/claude-3-5-sonnet-20240620" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">claude-3-5-sonnet-20240620</a>",
      • 54.71,
      • 50.5,
      • 85.1,
      • 85.1,
      • 94.4,
      • 18.1,
      • 21.6,
      • 49.6,
      • 33.3,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/JabirLLM-400B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">JabirLLM-400B</a>",
      • 54.21,
      • 29,
      • 52,
      • 67.5,
      • 89.7,
      • 40.5,
      • 36.6,
      • 77.7,
      • 40.7,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-72B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2-72B-Instruct</a>",
      • 52.32,
      • 28.5,
      • 73.7,
      • 79.3,
      • 83.3,
      • 30.8,
      • 23.6,
      • 69.4,
      • 30,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/google/gemma2-9b-it" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">google/gemma2-9b-it</a>",
      • 51.82,
      • 30.5,
      • 70.2,
      • 60.9,
      • 78.7,
      • 30,
      • 30.4,
      • 77.9,
      • 36,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "?",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3-70B-Instruct</a>",
      • 50.78,
      • 37,
      • 68,
      • 54.2,
      • 73.8,
      • 25.4,
      • 32.6,
      • 79.4,
      • 35.8,
      • "",
      • "?",
      • "?",
      • "?",
      • 0,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/Qwen/Qwen2-7B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Qwen/Qwen2-7B-Instruct</a>",
      • 42.99,
      • 39.5,
      • 60,
      • 54.4,
      • 44.6,
      • 31.6,
      • 22.9,
      • 63.9,
      • 27,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/gpt-3.5-turbo" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">gpt-3.5-turbo</a>",
      • 39,
      • 3.5,
      • 58.9,
      • 43.2,
      • 36.6,
      • 36,
      • 34.3,
      • 68.1,
      • 31.4,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/CohereForAI/aya-23-8B" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">CohereForAI/aya-23-8B</a>",
      • 38.78,
      • 28.5,
      • 36,
      • 39.5,
      • 46.7,
      • 35,
      • 31.8,
      • 67.5,
      • 25.2,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3.1-8B-Instruct</a>",
      • 38.39,
      • 3.5,
      • 53.7,
      • 46.2,
      • 40.9,
      • 29,
      • 30.1,
      • 73.4,
      • 30.3,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">meta-llama/Meta-Llama-3-8B-Instruct</a>",
      • 35.78,
      • 21.5,
      • 49.7,
      • 48.1,
      • 34.2,
      • 25.1,
      • 28.2,
      • 70.4,
      • 9,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/PartAI/Dorna-Llama3-8B-Instruct" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">PartAI/Dorna-Llama3-8B-Instruct</a>",
      • 35.35,
      • 23.5,
      • 42.3,
      • 41.1,
      • 40.8,
      • 24.6,
      • 26.5,
      • 61,
      • 23,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/MehdiHosseiniMoghadam/AVA-Llama-3-V2" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">MehdiHosseiniMoghadam/AVA-Llama-3-V2</a>",
      • 30.84,
      • 22.5,
      • 43.4,
      • 42.8,
      • 21.7,
      • 5.9,
      • 26,
      • 56.9,
      • 27.5,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ],
    • [
      • "๐ŸŸข",
      • "<a target="_blank" href="https://huggingface.co/universitytehran/PersianMind-v1.0" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">universitytehran/PersianMind-v1.0</a>",
      • 26.44,
      • 0,
      • 37.6,
      • 1.1,
      • 31.3,
      • 26,
      • 29.6,
      • 50,
      • 35.9,
      • "pretrained",
      • "?",
      • "?",
      • "custom",
      • 0.1,
      • 0,
      • false,
      • ""
      ]
    ],
  • "metadata": null
}