| { | |
| "leaderboard": [ | |
| { | |
| "model": "GLM-Image", | |
| "link": "https://z.ai/blog/glm-image", | |
| "hf": "https://huggingface.co/zai-org/GLM-Image", | |
| "open_source": true, | |
| "release_date": "2026-1", | |
| "Overall": 79.11, | |
| "Style": 89.62, | |
| "World Knowledge": 93.35, | |
| "Attribute-Overall": 83.92, | |
| "Quantity": 79.26, | |
| "Expression": 73.89, | |
| "Material": 85.62, | |
| "Size": 88.83, | |
| "Shape": 76.92, | |
| "Color": 87.74, | |
| "Action-Overall": 71.78, | |
| "Hand": 78.21, | |
| "Full body": 71.25, | |
| "Animal": 73.19, | |
| "Non Contact": 66.07, | |
| "Contact": 62.07, | |
| "State": 74.21, | |
| "Relationship-Overall": 77.16, | |
| "Composition": 73.09, | |
| "Similarity": 73.72, | |
| "Inclusion": 85.34, | |
| "Comparison": 80.58, | |
| "Compound-Overall": 74.85, | |
| "Imagination": 76.38, | |
| "Feature matching": 71.5, | |
| "Grammar-Overall": 73.48, | |
| "Pronoun Reference": 86.9, | |
| "Consistency": 73.41, | |
| "Negation": 61.62, | |
| "Layout-Overall": 85.95, | |
| "2D": 87.15, | |
| "3D": 84.42, | |
| "Logical Reasoning": 58.09, | |
| "Text": 82.88 | |
| }, | |
| { | |
| "model": "FLUX-2-flex", | |
| "link": "https://flux2-flex.com/", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-11", | |
| "Overall": 89.19, | |
| "Style": 98.33, | |
| "World Knowledge": 96.78, | |
| "Attribute-Overall": 95.71, | |
| "Quantity": 90.96, | |
| "Expression": 88.7, | |
| "Material": 96.89, | |
| "Size": 93.14, | |
| "Shape": 94.14, | |
| "Color": 99.15, | |
| "Action-Overall": 87.6, | |
| "Hand": 84.09, | |
| "Full body": 87.9, | |
| "Animal": 88.97, | |
| "Non Contact": 89.14, | |
| "Contact": 85.17, | |
| "State": 88.71, | |
| "Relationship-Overall": 92.84, | |
| "Composition": 91.56, | |
| "Similarity": 89.87, | |
| "Inclusion": 94.48, | |
| "Comparison": 96.12, | |
| "Compound-Overall": 92.11, | |
| "Imagination": 92.88, | |
| "Feature matching": 90.42, | |
| "Grammar-Overall": 86.98, | |
| "Pronoun Reference": 95.49, | |
| "Consistency": 84.92, | |
| "Negation": 81.43, | |
| "Layout-Overall": 95.03, | |
| "2D": 95.6, | |
| "3D": 94.29, | |
| "Logical Reasoning": 81.73, | |
| "Text": 64.8 | |
| }, | |
| { | |
| "model": "FLUX-2-max", | |
| "link": "https://bfl.ai/models/flux-2-max", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-12", | |
| "Overall": 89.8, | |
| "Style": 99.25, | |
| "World Knowledge": 97.37, | |
| "Attribute-Overall": 96.12, | |
| "Quantity": 89.36, | |
| "Expression": 89.55, | |
| "Material": 96.97, | |
| "Size": 95.06, | |
| "Shape": 96.13, | |
| "Color": 98.95, | |
| "Action-Overall": 88.05, | |
| "Hand": 87.99, | |
| "Full body": 88.61, | |
| "Animal": 89.34, | |
| "Non Contact": 89.73, | |
| "Contact": 84.88, | |
| "State": 88.25, | |
| "Relationship-Overall": 94.54, | |
| "Composition": 94.26, | |
| "Similarity": 90.65, | |
| "Inclusion": 97.09, | |
| "Comparison": 95.87, | |
| "Compound-Overall": 93.21, | |
| "Imagination": 94.16, | |
| "Feature matching": 91.12, | |
| "Grammar-Overall": 90.72, | |
| "Pronoun Reference": 96.31, | |
| "Consistency": 90.87, | |
| "Negation": 85.71, | |
| "Layout-Overall": 94.97, | |
| "2D": 94.89, | |
| "3D": 95.07, | |
| "Logical Reasoning": 85.96, | |
| "Text": 57.78 | |
| }, | |
| { | |
| "model": "FLUX-2-pro", | |
| "link": "https://docs.bfl.ai/flux_2/flux2_text_to_image", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-11", | |
| "Overall": 87.11, | |
| "Style": 98.83, | |
| "World Knowledge": 95.91, | |
| "Attribute-Overall": 94.66, | |
| "Quantity": 83.51, | |
| "Expression": 86.16, | |
| "Material": 97.27, | |
| "Size": 93.9, | |
| "Shape": 90.99, | |
| "Color": 98.42, | |
| "Action-Overall": 86.0, | |
| "Hand": 83.44, | |
| "Full body": 87.34, | |
| "Animal": 86.76, | |
| "Non Contact": 85.14, | |
| "Contact": 82.85, | |
| "State": 87.43, | |
| "Relationship-Overall": 92.42, | |
| "Composition": 91.71, | |
| "Similarity": 88.6, | |
| "Inclusion": 96.22, | |
| "Comparison": 93.45, | |
| "Compound-Overall": 91.96, | |
| "Imagination": 93.09, | |
| "Feature matching": 89.49, | |
| "Grammar-Overall": 86.47, | |
| "Pronoun Reference": 95.49, | |
| "Consistency": 83.73, | |
| "Negation": 81.07, | |
| "Layout-Overall": 93.12, | |
| "2D": 94.18, | |
| "3D": 91.76, | |
| "Logical Reasoning": 79.26, | |
| "Text": 52.5 | |
| }, | |
| { | |
| "model": "FLUX.2-klein-4b", | |
| "link": "https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence", | |
| "hf": "https://huggingface.co/black-forest-labs/FLUX.2-klein-4B", | |
| "open_source": true, | |
| "release_date": "2026-1", | |
| "Overall": 79.37, | |
| "Style": 98.67, | |
| "World Knowledge": 90.17, | |
| "Attribute-Overall": 93.14, | |
| "Quantity": 77.13, | |
| "Expression": 88.61, | |
| "Material": 95.26, | |
| "Size": 90.34, | |
| "Shape": 86.89, | |
| "Color": 97.59, | |
| "Action-Overall": 81.07, | |
| "Hand": 71.15, | |
| "Full body": 83.12, | |
| "Animal": 83.7, | |
| "Non Contact": 78.57, | |
| "Contact": 78.45, | |
| "State": 84.23, | |
| "Relationship-Overall": 88.2, | |
| "Composition": 88.78, | |
| "Similarity": 82.37, | |
| "Inclusion": 94.83, | |
| "Comparison": 85.92, | |
| "Compound-Overall": 87.03, | |
| "Imagination": 87.08, | |
| "Feature matching": 86.92, | |
| "Grammar-Overall": 85.03, | |
| "Pronoun Reference": 92.06, | |
| "Consistency": 82.94, | |
| "Negation": 80.63, | |
| "Layout-Overall": 90.71, | |
| "2D": 90.82, | |
| "3D": 90.58, | |
| "Logical Reasoning": 77.21, | |
| "Text": 2.45 | |
| }, | |
| { | |
| "model": "FLUX.2-klein-9b", | |
| "link": "https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence", | |
| "hf": "https://huggingface.co/black-forest-labs/FLUX.2-klein-9B", | |
| "open_source": true, | |
| "release_date": "2026-1", | |
| "Overall": 81.74, | |
| "Style": 99.09, | |
| "World Knowledge": 92.92, | |
| "Attribute-Overall": 94.03, | |
| "Quantity": 77.66, | |
| "Expression": 86.67, | |
| "Material": 96.54, | |
| "Size": 92.05, | |
| "Shape": 88.99, | |
| "Color": 98.74, | |
| "Action-Overall": 83.2, | |
| "Hand": 77.88, | |
| "Full body": 85.0, | |
| "Animal": 85.87, | |
| "Non Contact": 81.25, | |
| "Contact": 78.74, | |
| "State": 85.52, | |
| "Relationship-Overall": 90.73, | |
| "Composition": 90.56, | |
| "Similarity": 83.65, | |
| "Inclusion": 96.55, | |
| "Comparison": 91.5, | |
| "Compound-Overall": 91.69, | |
| "Imagination": 92.69, | |
| "Feature matching": 89.49, | |
| "Grammar-Overall": 86.55, | |
| "Pronoun Reference": 92.86, | |
| "Consistency": 88.89, | |
| "Negation": 78.87, | |
| "Layout-Overall": 93.33, | |
| "2D": 94.92, | |
| "3D": 91.3, | |
| "Logical Reasoning": 80.15, | |
| "Text": 5.71 | |
| }, | |
| { | |
| "model": "FLUX.2-klein-base-4b", | |
| "link": "https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence", | |
| "hf": "https://huggingface.co/black-forest-labs/FLUX.2-klein-base-4B", | |
| "open_source": true, | |
| "release_date": "2026-1", | |
| "Overall": 76.61, | |
| "Style": 97.84, | |
| "World Knowledge": 82.95, | |
| "Attribute-Overall": 90.64, | |
| "Quantity": 77.66, | |
| "Expression": 81.94, | |
| "Material": 92.47, | |
| "Size": 89.02, | |
| "Shape": 86.71, | |
| "Color": 95.55, | |
| "Action-Overall": 75.92, | |
| "Hand": 71.15, | |
| "Full body": 75.94, | |
| "Animal": 73.91, | |
| "Non Contact": 74.11, | |
| "Contact": 68.97, | |
| "State": 80.75, | |
| "Relationship-Overall": 83.62, | |
| "Composition": 82.14, | |
| "Similarity": 78.85, | |
| "Inclusion": 90.8, | |
| "Comparison": 83.98, | |
| "Compound-Overall": 83.82, | |
| "Imagination": 84.85, | |
| "Feature matching": 81.54, | |
| "Grammar-Overall": 86.04, | |
| "Pronoun Reference": 91.27, | |
| "Consistency": 85.71, | |
| "Negation": 81.69, | |
| "Layout-Overall": 88.33, | |
| "2D": 91.1, | |
| "3D": 84.78, | |
| "Logical Reasoning": 75.0, | |
| "Text": 1.9 | |
| }, | |
| { | |
| "model": "FLUX.2-klein-base-9b", | |
| "link": "https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence", | |
| "hf": "https://huggingface.co/black-forest-labs/FLUX.2-klein-base-9B", | |
| "open_source": true, | |
| "release_date": "2026-1", | |
| "Overall": 81.41, | |
| "Style": 97.67, | |
| "World Knowledge": 92.63, | |
| "Attribute-Overall": 94.09, | |
| "Quantity": 82.45, | |
| "Expression": 86.94, | |
| "Material": 95.86, | |
| "Size": 93.18, | |
| "Shape": 89.69, | |
| "Color": 98.27, | |
| "Action-Overall": 82.76, | |
| "Hand": 81.09, | |
| "Full body": 83.75, | |
| "Animal": 80.07, | |
| "Non Contact": 79.46, | |
| "Contact": 79.6, | |
| "State": 85.52, | |
| "Relationship-Overall": 90.95, | |
| "Composition": 90.94, | |
| "Similarity": 87.18, | |
| "Inclusion": 95.98, | |
| "Comparison": 89.56, | |
| "Compound-Overall": 90.6, | |
| "Imagination": 90.78, | |
| "Feature matching": 90.19, | |
| "Grammar-Overall": 86.42, | |
| "Pronoun Reference": 92.46, | |
| "Consistency": 86.11, | |
| "Negation": 81.34, | |
| "Layout-Overall": 92.38, | |
| "2D": 92.94, | |
| "3D": 91.67, | |
| "Logical Reasoning": 80.64, | |
| "Text": 5.98 | |
| }, | |
| { | |
| "model": "GPT-4o-1.5", | |
| "link": "https://developers.openai.com/api/docs/models/gpt-image-1.5", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-12", | |
| "Overall": 96.12, | |
| "Style": 98.73, | |
| "World Knowledge": 99.27, | |
| "Attribute-Overall": 98.18, | |
| "Quantity": 95.11, | |
| "Expression": 95.93, | |
| "Material": 98.18, | |
| "Size": 97.14, | |
| "Shape": 98.25, | |
| "Color": 99.58, | |
| "Action-Overall": 94.31, | |
| "Hand": 96.05, | |
| "Full body": 96.23, | |
| "Animal": 98.55, | |
| "Non Contact": 94.14, | |
| "Contact": 95.4, | |
| "State": 91.67, | |
| "Relationship-Overall": 96.79, | |
| "Composition": 96.77, | |
| "Similarity": 94.52, | |
| "Inclusion": 99.42, | |
| "Comparison": 96.36, | |
| "Compound-Overall": 98.08, | |
| "Imagination": 98.72, | |
| "Feature matching": 96.63, | |
| "Grammar-Overall": 96.01, | |
| "Pronoun Reference": 97.18, | |
| "Consistency": 96.03, | |
| "Negation": 94.93, | |
| "Layout-Overall": 96.47, | |
| "2D": 97.14, | |
| "3D": 95.6, | |
| "Logical Reasoning": 94.36, | |
| "Text": 89.01 | |
| }, | |
| { | |
| "model": "LongCat-Image", | |
| "link": "https://arxiv.org/pdf/2512.07584", | |
| "hf": "https://huggingface.co/meituan-longcat/LongCat-Image", | |
| "open_source": true, | |
| "release_date": "2025-12", | |
| "Overall": 83.14, | |
| "Style": 90.2, | |
| "World Knowledge": 93.35, | |
| "Attribute-Overall": 90.96, | |
| "Quantity": 78.72, | |
| "Expression": 82.08, | |
| "Material": 91.79, | |
| "Size": 89.39, | |
| "Shape": 86.19, | |
| "Color": 96.8, | |
| "Action-Overall": 81.11, | |
| "Hand": 76.28, | |
| "Full body": 85.0, | |
| "Animal": 86.96, | |
| "Non Contact": 75.0, | |
| "Contact": 74.14, | |
| "State": 83.53, | |
| "Relationship-Overall": 82.6, | |
| "Composition": 81.25, | |
| "Similarity": 72.76, | |
| "Inclusion": 90.52, | |
| "Comparison": 85.92, | |
| "Compound-Overall": 81.27, | |
| "Imagination": 81.89, | |
| "Feature matching": 79.91, | |
| "Grammar-Overall": 77.79, | |
| "Pronoun Reference": 90.87, | |
| "Consistency": 75.0, | |
| "Negation": 68.66, | |
| "Layout-Overall": 86.35, | |
| "2D": 88.84, | |
| "3D": 83.15, | |
| "Logical Reasoning": 65.69, | |
| "Text": 82.07 | |
| }, | |
| { | |
| "model": "Seedream-4-5-251128", | |
| "link": "https://seed.bytedance.com/en/seedream4_5", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-11", | |
| "Overall": 93.12, | |
| "Style": 99.0, | |
| "World Knowledge": 97.83, | |
| "Attribute-Overall": 96.49, | |
| "Quantity": 89.89, | |
| "Expression": 91.81, | |
| "Material": 97.06, | |
| "Size": 95.08, | |
| "Shape": 96.14, | |
| "Color": 99.0, | |
| "Action-Overall": 90.55, | |
| "Hand": 87.82, | |
| "Full body": 90.31, | |
| "Animal": 93.48, | |
| "Non Contact": 89.24, | |
| "Contact": 87.07, | |
| "State": 92.15, | |
| "Relationship-Overall": 92.29, | |
| "Composition": 90.18, | |
| "Similarity": 88.1, | |
| "Inclusion": 97.13, | |
| "Comparison": 95.38, | |
| "Compound-Overall": 90.88, | |
| "Imagination": 90.67, | |
| "Feature matching": 91.36, | |
| "Grammar-Overall": 89.96, | |
| "Pronoun Reference": 98.81, | |
| "Consistency": 90.84, | |
| "Negation": 81.34, | |
| "Layout-Overall": 94.2, | |
| "2D": 95.76, | |
| "3D": 92.2, | |
| "Logical Reasoning": 86.76, | |
| "Text": 93.21 | |
| }, | |
| { | |
| "model": "Z-Image", | |
| "link": "https://arxiv.org/pdf/2511.22699", | |
| "hf": "https://huggingface.co/Tongyi-MAI/Z-Image", | |
| "open_source": true, | |
| "release_date": "2026-1", | |
| "Overall": 89.17, | |
| "Style": 97.67, | |
| "World Knowledge": 95.52, | |
| "Attribute-Overall": 94.32, | |
| "Quantity": 86.7, | |
| "Expression": 86.39, | |
| "Material": 97.21, | |
| "Size": 92.42, | |
| "Shape": 89.51, | |
| "Color": 98.01, | |
| "Action-Overall": 86.13, | |
| "Hand": 86.22, | |
| "Full body": 88.44, | |
| "Animal": 84.42, | |
| "Non Contact": 83.93, | |
| "Contact": 81.32, | |
| "State": 88.0, | |
| "Relationship-Overall": 89.12, | |
| "Composition": 88.9, | |
| "Similarity": 83.01, | |
| "Inclusion": 92.53, | |
| "Comparison": 91.26, | |
| "Compound-Overall": 86.15, | |
| "Imagination": 86.02, | |
| "Feature matching": 86.45, | |
| "Grammar-Overall": 81.73, | |
| "Pronoun Reference": 93.65, | |
| "Consistency": 80.16, | |
| "Negation": 72.54, | |
| "Layout-Overall": 92.62, | |
| "2D": 93.79, | |
| "3D": 91.12, | |
| "Logical Reasoning": 79.9, | |
| "Text": 88.59 | |
| }, | |
| { | |
| "model": "FLUX.2-dev", | |
| "link": "https://github.com/black-forest-labs/flux2", | |
| "hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev", | |
| "open_source": true, | |
| "release_date": "2025-11", | |
| "Overall": 86.12, | |
| "Style": 98.42, | |
| "World Knowledge": 95.52, | |
| "Attribute-Overall": 95.29, | |
| "Quantity": 84.57, | |
| "Expression": 89.44, | |
| "Material": 97.59, | |
| "Size": 92.8, | |
| "Shape": 93.01, | |
| "Color": 98.32, | |
| "Action-Overall": 88.46, | |
| "Hand": 85.58, | |
| "Full body": 87.81, | |
| "Animal": 91.67, | |
| "Non Contact": 87.95, | |
| "Contact": 88.51, | |
| "State": 88.79, | |
| "Relationship-Overall": 92.4, | |
| "Composition": 92.73, | |
| "Similarity": 88.46, | |
| "Inclusion": 95.4, | |
| "Comparison": 92.23, | |
| "Compound-Overall": 89.5, | |
| "Imagination": 90.25, | |
| "Feature matching": 87.85, | |
| "Grammar-Overall": 84.26, | |
| "Pronoun Reference": 97.62, | |
| "Consistency": 82.94, | |
| "Negation": 73.59, | |
| "Layout-Overall": 94.44, | |
| "2D": 93.93, | |
| "3D": 95.11, | |
| "Logical Reasoning": 79.66, | |
| "Text": 43.21 | |
| }, | |
| { | |
| "model": "Nano Banana Pro", | |
| "link": "https://nano-banana.pro", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-11", | |
| "Overall": 95.42, | |
| "Style": 99.42, | |
| "World Knowledge": 98.84, | |
| "Attribute-Overall": 97.14, | |
| "Quantity": 89.36, | |
| "Expression": 94.31, | |
| "Material": 98.27, | |
| "Size": 96.78, | |
| "Shape": 94.23, | |
| "Color": 99.16, | |
| "Action-Overall": 92.97, | |
| "Hand": 93.59, | |
| "Full body": 92.81, | |
| "Animal": 97.46, | |
| "Non Contact": 91.52, | |
| "Contact": 91.95, | |
| "State": 92.26, | |
| "Relationship-Overall": 95.64, | |
| "Composition": 95.79, | |
| "Similarity": 93.91, | |
| "Inclusion": 97.99, | |
| "Comparison": 94.66, | |
| "Compound-Overall": 95.85, | |
| "Imagination": 95.87, | |
| "Feature matching": 95.79, | |
| "Grammar-Overall": 93.27, | |
| "Pronoun Reference": 99.21, | |
| "Consistency": 90.87, | |
| "Negation": 90.14, | |
| "Layout-Overall": 96.27, | |
| "2D": 96.47, | |
| "3D": 96.01, | |
| "Logical Reasoning": 91.91, | |
| "Text": 92.93 | |
| }, | |
| { | |
| "model": "Z-Image-Turbo", | |
| "link": "https://github.com/Tongyi-MAI/Z-Image", | |
| "hf": "https://huggingface.co/Tongyi-MAI/Z-Image-Turbo", | |
| "open_source": true, | |
| "release_date": "2025-11", | |
| "Overall": 83.69, | |
| "Style": 96.26, | |
| "World Knowledge": 94.8, | |
| "Attribute-Overall": 90.96, | |
| "Quantity": 77.66, | |
| "Expression": 79.31, | |
| "Material": 93.37, | |
| "Size": 87.88, | |
| "Shape": 85.49, | |
| "Color": 97.48, | |
| "Action-Overall": 78.74, | |
| "Hand": 75.64, | |
| "Full body": 78.75, | |
| "Animal": 77.9, | |
| "Non Contact": 75.45, | |
| "Contact": 73.85, | |
| "State": 82.34, | |
| "Relationship-Overall": 84.38, | |
| "Composition": 84.69, | |
| "Similarity": 75.0, | |
| "Inclusion": 87.93, | |
| "Comparison": 87.86, | |
| "Compound-Overall": 80.03, | |
| "Imagination": 79.87, | |
| "Feature matching": 80.37, | |
| "Grammar-Overall": 78.3, | |
| "Pronoun Reference": 89.68, | |
| "Consistency": 77.78, | |
| "Negation": 68.66, | |
| "Layout-Overall": 87.94, | |
| "2D": 90.4, | |
| "3D": 84.78, | |
| "Logical Reasoning": 70.83, | |
| "Text": 74.73 | |
| }, | |
| { | |
| "model": "wan2.5-t2i-preview", | |
| "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-09", | |
| "Overall": 84.36, | |
| "Style": 97.42, | |
| "World Knowledge": 94.15, | |
| "Attribute-Overall": 91.04, | |
| "Quantity": 82.98, | |
| "Expression": 82.72, | |
| "Material": 92.22, | |
| "Size": 91.79, | |
| "Shape": 87.59, | |
| "Color": 94.96, | |
| "Action-Overall": 77.75, | |
| "Hand": 71.15, | |
| "Full body": 75.32, | |
| "Animal": 83.46, | |
| "Non Contact": 75.91, | |
| "Contact": 73.28, | |
| "State": 80.98, | |
| "Relationship-Overall": 87.23, | |
| "Composition": 85.97, | |
| "Similarity": 79.17, | |
| "Inclusion": 91.95, | |
| "Comparison": 91.75, | |
| "Compound-Overall": 85.53, | |
| "Imagination": 86.65, | |
| "Feature matching": 83.02, | |
| "Grammar-Overall": 81.09, | |
| "Pronoun Reference": 92.46, | |
| "Consistency": 82.54, | |
| "Negation": 69.72, | |
| "Layout-Overall": 89.01, | |
| "2D": 89.63, | |
| "3D": 88.22, | |
| "Logical Reasoning": 73.28, | |
| "Text": 67.12 | |
| }, | |
| { | |
| "model": "Emu3", | |
| "link": "https://arxiv.org/pdf/2409.18869", | |
| "hf": "https://huggingface.co/BAAI/Emu3-Gen", | |
| "open_source": true, | |
| "release_date": "2024-09", | |
| "Overall": 35.95, | |
| "Style": 75.08, | |
| "World Knowledge": 53.03, | |
| "Attribute-Overall": 48.82, | |
| "Quantity": 23.4, | |
| "Expression": 38.33, | |
| "Material": 49.17, | |
| "Size": 57.77, | |
| "Shape": 36.19, | |
| "Color": 56.34, | |
| "Action-Overall": 27.81, | |
| "Hand": 10.58, | |
| "Full body": 22.81, | |
| "Animal": 25.36, | |
| "Non Contact": 12.05, | |
| "Contact": 17.53, | |
| "State": 42.39, | |
| "Relationship-Overall": 32.06, | |
| "Composition": 33.29, | |
| "Similarity": 29.17, | |
| "Inclusion": 35.06, | |
| "Comparison": 29.37, | |
| "Compound-Overall": 28.49, | |
| "Imagination": 33.02, | |
| "Feature matching": 18.46, | |
| "Grammar-Overall": 38.32, | |
| "Pronoun Reference": 42.86, | |
| "Consistency": 26.59, | |
| "Negation": 44.72, | |
| "Layout-Overall": 35.4, | |
| "2D": 30.37, | |
| "3D": 41.85, | |
| "Logical Reasoning": 19.66, | |
| "Text": 0.82 | |
| }, | |
| { | |
| "model": "UniWorld-V1", | |
| "link": "https://arxiv.org/pdf/2506.03147", | |
| "hf": "https://huggingface.co/LanguageBind/UniWorld-V1", | |
| "open_source": true, | |
| "release_date": "2025-06", | |
| "Overall": 21.5, | |
| "Style": 55.48, | |
| "World Knowledge": 17.34, | |
| "Attribute-Overall": 27.5, | |
| "Quantity": 12.23, | |
| "Expression": 30.28, | |
| "Material": 19.8, | |
| "Size": 27.27, | |
| "Shape": 19.76, | |
| "Color": 35.69, | |
| "Action-Overall": 19.34, | |
| "Hand": 12.18, | |
| "Full body": 20.31, | |
| "Animal": 23.19, | |
| "Non Contact": 9.38, | |
| "Contact": 8.05, | |
| "State": 26.28, | |
| "Relationship-Overall": 19.34, | |
| "Composition": 16.2, | |
| "Similarity": 21.47, | |
| "Inclusion": 23.56, | |
| "Comparison": 20.15, | |
| "Compound-Overall": 12.5, | |
| "Imagination": 15.3, | |
| "Feature matching": 6.31, | |
| "Grammar-Overall": 28.68, | |
| "Pronoun Reference": 23.81, | |
| "Consistency": 21.03, | |
| "Negation": 39.79, | |
| "Layout-Overall": 24.44, | |
| "2D": 24.15, | |
| "3D": 24.82, | |
| "Logical Reasoning": 8.98, | |
| "Text": 1.36 | |
| }, | |
| { | |
| "model": "Echo-4o", | |
| "link": "https://arxiv.org/pdf/2508.09987", | |
| "hf": "https://huggingface.co/Yejy53/Echo-4o", | |
| "open_source": true, | |
| "release_date": "2025-8", | |
| "Overall": 78.31, | |
| "Style": 96.26, | |
| "World Knowledge": 91.18, | |
| "Attribute-Overall": 91.82, | |
| "Quantity": 71.81, | |
| "Expression": 82.22, | |
| "Material": 94.5, | |
| "Size": 90.72, | |
| "Shape": 88.64, | |
| "Color": 96.8, | |
| "Action-Overall": 75.56, | |
| "Hand": 73.72, | |
| "Full body": 81.56, | |
| "Animal": 74.28, | |
| "Non Contact": 67.41, | |
| "Contact": 66.38, | |
| "State": 79.55, | |
| "Relationship-Overall": 85.83, | |
| "Composition": 86.99, | |
| "Similarity": 81.09, | |
| "Inclusion": 89.08, | |
| "Comparison": 84.47, | |
| "Compound-Overall": 85.25, | |
| "Imagination": 86.08, | |
| "Feature matching": 83.41, | |
| "Grammar-Overall": 83.5, | |
| "Pronoun Reference": 87.7, | |
| "Consistency": 83.73, | |
| "Negation": 79.58, | |
| "Layout-Overall": 88.1, | |
| "2D": 90.54, | |
| "3D": 84.96, | |
| "Logical Reasoning": 72.57, | |
| "Text": 13.04 | |
| }, | |
| { | |
| "model": "Lumina-DiMOO", | |
| "link": "https://synbol.github.io/Lumina-DiMOO/", | |
| "hf": "https://huggingface.co/Alpha-VLLM/Lumina-DiMOO", | |
| "open_source": true, | |
| "release_date": "2025-09", | |
| "Overall": 63.8, | |
| "Style": 84.3, | |
| "World Knowledge": 76.45, | |
| "Attribute-Overall": 79.41, | |
| "Quantity": 64.36, | |
| "Expression": 68.06, | |
| "Material": 77.18, | |
| "Size": 82.01, | |
| "Shape": 72.73, | |
| "Color": 88.0, | |
| "Action-Overall": 61.32, | |
| "Hand": 54.81, | |
| "Full body": 57.5, | |
| "Animal": 61.96, | |
| "Non Contact": 60.27, | |
| "Contact": 49.43, | |
| "State": 68.68, | |
| "Relationship-Overall": 66.7, | |
| "Composition": 62.24, | |
| "Similarity": 61.22, | |
| "Inclusion": 78.74, | |
| "Comparison": 69.17, | |
| "Compound-Overall": 68.9, | |
| "Imagination": 72.57, | |
| "Feature matching": 60.75, | |
| "Grammar-Overall": 71.95, | |
| "Pronoun Reference": 76.98, | |
| "Consistency": 67.06, | |
| "Negation": 71.83, | |
| "Layout-Overall": 78.33, | |
| "2D": 84.18, | |
| "3D": 70.83, | |
| "Logical Reasoning": 49.27, | |
| "Text": 1.36 | |
| }, | |
| { | |
| "model": "MMaDA", | |
| "link": "https://arxiv.org/pdf/2505.15809", | |
| "hf": "https://huggingface.co/Gen-Verse/MMaDA-8B-MixCoT", | |
| "open_source": true, | |
| "release_date": "2025-05", | |
| "Overall": 50.61, | |
| "Style": 84.05, | |
| "World Knowledge": 63.58, | |
| "Attribute-Overall": 61.31, | |
| "Quantity": 46.81, | |
| "Expression": 40.0, | |
| "Material": 58.96, | |
| "Size": 67.8, | |
| "Shape": 52.62, | |
| "Color": 73.22, | |
| "Action-Overall": 42.98, | |
| "Hand": 23.4, | |
| "Full body": 39.06, | |
| "Animal": 40.58, | |
| "Non Contact": 29.02, | |
| "Contact": 30.75, | |
| "State": 58.2, | |
| "Relationship-Overall": 52.69, | |
| "Composition": 48.09, | |
| "Similarity": 49.04, | |
| "Inclusion": 60.63, | |
| "Comparison": 57.52, | |
| "Compound-Overall": 50.07, | |
| "Imagination": 56.65, | |
| "Feature matching": 35.51, | |
| "Grammar-Overall": 58.76, | |
| "Pronoun Reference": 61.11, | |
| "Consistency": 50.79, | |
| "Negation": 63.73, | |
| "Layout-Overall": 60.63, | |
| "2D": 65.54, | |
| "3D": 54.35, | |
| "Logical Reasoning": 31.8, | |
| "Text": 0.27 | |
| }, | |
| { | |
| "model": "OmniGen2", | |
| "link": "https://arxiv.org/pdf/2506.18871", | |
| "hf": "https://huggingface.co/OmniGen2/OmniGen2", | |
| "open_source": true, | |
| "release_date": "2025-06", | |
| "Overall": 70.75, | |
| "Style": 95.35, | |
| "World Knowledge": 87.57, | |
| "Attribute-Overall": 85.05, | |
| "Quantity": 74.47, | |
| "Expression": 73.33, | |
| "Material": 84.94, | |
| "Size": 85.23, | |
| "Shape": 79.9, | |
| "Color": 92.09, | |
| "Action-Overall": 67.17, | |
| "Hand": 63.46, | |
| "Full body": 67.81, | |
| "Animal": 63.41, | |
| "Non Contact": 63.39, | |
| "Contact": 60.34, | |
| "State": 72.33, | |
| "Relationship-Overall": 75.38, | |
| "Composition": 70.79, | |
| "Similarity": 70.51, | |
| "Inclusion": 87.64, | |
| "Comparison": 77.43, | |
| "Compound-Overall": 74.06, | |
| "Imagination": 76.05, | |
| "Feature matching": 69.63, | |
| "Grammar-Overall": 77.03, | |
| "Pronoun Reference": 85.71, | |
| "Consistency": 76.59, | |
| "Negation": 69.72, | |
| "Layout-Overall": 81.35, | |
| "2D": 84.89, | |
| "3D": 76.81, | |
| "Logical Reasoning": 62.62, | |
| "Text": 1.9 | |
| }, | |
| { | |
| "model": "OneCAT", | |
| "link": "https://arxiv.org/pdf/2509.03498", | |
| "hf": "https://huggingface.co/onecat-ai/OneCAT-3B", | |
| "open_source": true, | |
| "release_date": "2025-09", | |
| "Overall": 61.4, | |
| "Style": 96.01, | |
| "World Knowledge": 80.35, | |
| "Attribute-Overall": 72.01, | |
| "Quantity": 60.11, | |
| "Expression": 63.75, | |
| "Material": 73.87, | |
| "Size": 79.17, | |
| "Shape": 58.57, | |
| "Color": 77.04, | |
| "Action-Overall": 56.9, | |
| "Hand": 32.69, | |
| "Full body": 64.06, | |
| "Animal": 58.33, | |
| "Non Contact": 46.43, | |
| "Contact": 40.52, | |
| "State": 69.66, | |
| "Relationship-Overall": 61.85, | |
| "Composition": 63.65, | |
| "Similarity": 58.01, | |
| "Inclusion": 60.06, | |
| "Comparison": 62.86, | |
| "Compound-Overall": 58.5, | |
| "Imagination": 68.99, | |
| "Feature matching": 35.28, | |
| "Grammar-Overall": 63.2, | |
| "Pronoun Reference": 69.05, | |
| "Consistency": 63.89, | |
| "Negation": 57.39, | |
| "Layout-Overall": 73.49, | |
| "2D": 76.27, | |
| "3D": 69.93, | |
| "Logical Reasoning": 49.76, | |
| "Text": 1.9 | |
| }, | |
| { | |
| "model": "X-Omni", | |
| "link": "https://arxiv.org/pdf/2507.22058", | |
| "hf": "https://huggingface.co/X-Omni/X-Omni-Zh", | |
| "open_source": true, | |
| "release_date": "2025-08", | |
| "Overall": 62.18, | |
| "Style": 76.91, | |
| "World Knowledge": 74.13, | |
| "Attribute-Overall": 76.51, | |
| "Quantity": 72.34, | |
| "Expression": 59.72, | |
| "Material": 77.79, | |
| "Size": 82.2, | |
| "Shape": 67.83, | |
| "Color": 83.39, | |
| "Action-Overall": 58.43, | |
| "Hand": 50.0, | |
| "Full body": 61.56, | |
| "Animal": 61.96, | |
| "Non Contact": 49.55, | |
| "Contact": 42.82, | |
| "State": 66.4, | |
| "Relationship-Overall": 60.83, | |
| "Composition": 57.02, | |
| "Similarity": 55.45, | |
| "Inclusion": 65.52, | |
| "Comparison": 68.2, | |
| "Compound-Overall": 61.12, | |
| "Imagination": 65.51, | |
| "Feature matching": 51.4, | |
| "Grammar-Overall": 64.85, | |
| "Pronoun Reference": 76.19, | |
| "Consistency": 58.33, | |
| "Negation": 60.56, | |
| "Layout-Overall": 73.02, | |
| "2D": 76.84, | |
| "3D": 68.12, | |
| "Logical Reasoning": 46.6, | |
| "Text": 29.35 | |
| }, | |
| { | |
| "model": "Bagel", | |
| "link": "https://arxiv.org/pdf/2505.14683", | |
| "hf": "https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT", | |
| "open_source": true, | |
| "release_date": "2025-05", | |
| "Overall": 75.75, | |
| "Style": 96.1, | |
| "World Knowledge": 89.02, | |
| "Attribute-Overall": 88.25, | |
| "Quantity": 71.81, | |
| "Expression": 73.47, | |
| "Material": 88.93, | |
| "Size": 90.53, | |
| "Shape": 83.39, | |
| "Color": 95.81, | |
| "Action-Overall": 72.43, | |
| "Hand": 71.47, | |
| "Full body": 75.62, | |
| "Animal": 76.09, | |
| "Non Contact": 66.96, | |
| "Contact": 63.22, | |
| "State": 75.1, | |
| "Relationship-Overall": 81.52, | |
| "Composition": 80.87, | |
| "Similarity": 76.6, | |
| "Inclusion": 86.78, | |
| "Comparison": 82.04, | |
| "Compound-Overall": 82.05, | |
| "Imagination": 83.97, | |
| "Feature matching": 77.8, | |
| "Grammar-Overall": 81.09, | |
| "Pronoun Reference": 84.92, | |
| "Consistency": 83.33, | |
| "Negation": 75.7, | |
| "Layout-Overall": 83.97, | |
| "2D": 87.29, | |
| "3D": 79.71, | |
| "Logical Reasoning": 68.69, | |
| "Text": 14.4 | |
| }, | |
| { | |
| "model": "HiDream-I1-Full", | |
| "link": "https://arxiv.org/pdf/2505.22705", | |
| "hf": "https://huggingface.co/HiDream-ai/HiDream-I1-Full", | |
| "open_source": true, | |
| "release_date": "2025-05", | |
| "Overall": 50.7, | |
| "Style": 83.06, | |
| "World Knowledge": 78.61, | |
| "Attribute-Overall": 65.05, | |
| "Quantity": 63.3, | |
| "Expression": 55.97, | |
| "Material": 62.5, | |
| "Size": 69.7, | |
| "Shape": 56.12, | |
| "Color": 71.8, | |
| "Action-Overall": 47.47, | |
| "Hand": 38.14, | |
| "Full body": 45.0, | |
| "Animal": 44.93, | |
| "Non Contact": 38.39, | |
| "Contact": 36.21, | |
| "State": 57.71, | |
| "Relationship-Overall": 49.25, | |
| "Composition": 46.3, | |
| "Similarity": 45.83, | |
| "Inclusion": 59.2, | |
| "Comparison": 49.03, | |
| "Compound-Overall": 42.08, | |
| "Imagination": 45.99, | |
| "Feature matching": 33.41, | |
| "Grammar-Overall": 53.81, | |
| "Pronoun Reference": 59.52, | |
| "Consistency": 49.6, | |
| "Negation": 52.46, | |
| "Layout-Overall": 60.4, | |
| "2D": 62.99, | |
| "3D": 57.07, | |
| "Logical Reasoning": 24.27, | |
| "Text": 2.99 | |
| }, | |
| { | |
| "model": "Hunyuan-Image-2.1", | |
| "link": "https://github.com/Tencent-Hunyuan/HunyuanImage-2.1", | |
| "hf": "https://huggingface.co/spaces/tencent/HunyuanImage-2.1", | |
| "open_source": true, | |
| "release_date": "2025-09", | |
| "Overall": 87.01, | |
| "Style": 95.18, | |
| "World Knowledge": 94.08, | |
| "Attribute-Overall": 93.82, | |
| "Quantity": 87.77, | |
| "Expression": 87.08, | |
| "Material": 95.41, | |
| "Size": 91.67, | |
| "Shape": 89.69, | |
| "Color": 97.69, | |
| "Action-Overall": 83.99, | |
| "Hand": 85.58, | |
| "Full body": 84.69, | |
| "Animal": 85.51, | |
| "Non Contact": 83.48, | |
| "Contact": 79.02, | |
| "State": 84.68, | |
| "Relationship-Overall": 88.09, | |
| "Composition": 87.88, | |
| "Similarity": 81.41, | |
| "Inclusion": 92.24, | |
| "Comparison": 90.05, | |
| "Compound-Overall": 85.61, | |
| "Imagination": 85.97, | |
| "Feature matching": 84.81, | |
| "Grammar-Overall": 80.08, | |
| "Pronoun Reference": 92.86, | |
| "Consistency": 83.33, | |
| "Negation": 65.85, | |
| "Layout-Overall": 91.43, | |
| "2D": 93.5, | |
| "3D": 88.77, | |
| "Logical Reasoning": 71.36, | |
| "Text": 86.41 | |
| }, | |
| { | |
| "model": "BLIP3-o", | |
| "link": "https://arxiv.org/pdf/2505.09568", | |
| "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B", | |
| "open_source": true, | |
| "release_date": "2025-08", | |
| "Overall": 59.25, | |
| "Style": 89.7, | |
| "World Knowledge": 77.17, | |
| "Attribute-Overall": 69.24, | |
| "Quantity": 53.19, | |
| "Expression": 59.03, | |
| "Material": 71.31, | |
| "Size": 79.36, | |
| "Shape": 54.02, | |
| "Color": 75.0, | |
| "Action-Overall": 55.98, | |
| "Hand": 42.63, | |
| "Full body": 59.38, | |
| "Animal": 60.87, | |
| "Non Contact": 45.98, | |
| "Contact": 43.97, | |
| "State": 64.03, | |
| "Relationship-Overall": 60.56, | |
| "Composition": 58.29, | |
| "Similarity": 54.81, | |
| "Inclusion": 60.63, | |
| "Comparison": 69.17, | |
| "Compound-Overall": 60.68, | |
| "Imagination": 67.72, | |
| "Feature matching": 45.09, | |
| "Grammar-Overall": 60.91, | |
| "Pronoun Reference": 72.22, | |
| "Consistency": 53.17, | |
| "Negation": 57.75, | |
| "Layout-Overall": 69.29, | |
| "2D": 72.6, | |
| "3D": 65.04, | |
| "Logical Reasoning": 47.09, | |
| "Text": 1.9 | |
| }, | |
| { | |
| "model": "BLIP3-o-Next", | |
| "link": "https://arxiv.org/pdf/2505.09568", | |
| "hf": "https://huggingface.co/BLIP3o/BLIP3o-NEXT-SFT-3B", | |
| "open_source": true, | |
| "release_date": "2025-08", | |
| "Overall": 54.55, | |
| "Style": 87.71, | |
| "World Knowledge": 61.85, | |
| "Attribute-Overall": 63.75, | |
| "Quantity": 50.0, | |
| "Expression": 64.58, | |
| "Material": 67.85, | |
| "Size": 67.61, | |
| "Shape": 55.94, | |
| "Color": 63.21, | |
| "Action-Overall": 51.81, | |
| "Hand": 37.5, | |
| "Full body": 56.25, | |
| "Animal": 50.72, | |
| "Non Contact": 45.98, | |
| "Contact": 37.36, | |
| "State": 61.36, | |
| "Relationship-Overall": 57.76, | |
| "Composition": 55.36, | |
| "Similarity": 53.53, | |
| "Inclusion": 60.34, | |
| "Comparison": 63.35, | |
| "Compound-Overall": 54.0, | |
| "Imagination": 59.49, | |
| "Feature matching": 41.82, | |
| "Grammar-Overall": 60.66, | |
| "Pronoun Reference": 65.48, | |
| "Consistency": 58.73, | |
| "Negation": 58.1, | |
| "Layout-Overall": 64.6, | |
| "2D": 67.8, | |
| "3D": 60.51, | |
| "Logical Reasoning": 41.5, | |
| "Text": 1.9 | |
| }, | |
| { | |
| "model": "Janus-flow", | |
| "link": "https://arxiv.org/pdf/2411.07975", | |
| "hf": "https://huggingface.co/deepseek-ai/JanusFlow-1.3B", | |
| "open_source": true, | |
| "release_date": "2024-11", | |
| "Overall": 23.09, | |
| "Style": 57.39, | |
| "World Knowledge": 17.49, | |
| "Attribute-Overall": 23.42, | |
| "Quantity": 11.7, | |
| "Expression": 11.39, | |
| "Material": 23.72, | |
| "Size": 32.2, | |
| "Shape": 15.91, | |
| "Color": 28.72, | |
| "Action-Overall": 19.46, | |
| "Hand": 3.85, | |
| "Full body": 18.75, | |
| "Animal": 19.2, | |
| "Non Contact": 9.38, | |
| "Contact": 9.48, | |
| "State": 30.24, | |
| "Relationship-Overall": 20.04, | |
| "Composition": 18.62, | |
| "Similarity": 18.91, | |
| "Inclusion": 24.43, | |
| "Comparison": 19.9, | |
| "Compound-Overall": 21.58, | |
| "Imagination": 28.8, | |
| "Feature matching": 5.61, | |
| "Grammar-Overall": 32.23, | |
| "Pronoun Reference": 29.76, | |
| "Consistency": 13.89, | |
| "Negation": 50.7, | |
| "Layout-Overall": 21.59, | |
| "2D": 18.64, | |
| "3D": 25.36, | |
| "Logical Reasoning": 17.48, | |
| "Text": 0.27 | |
| }, | |
| { | |
| "model": "Janus", | |
| "link": "https://arxiv.org/pdf/2410.13848", | |
| "hf": "https://huggingface.co/deepseek-ai/Janus-1.3B", | |
| "open_source": true, | |
| "release_date": "2024-10", | |
| "Overall": 33.63, | |
| "Style": 75.0, | |
| "World Knowledge": 30.06, | |
| "Attribute-Overall": 35.98, | |
| "Quantity": 25.53, | |
| "Expression": 25.97, | |
| "Material": 39.16, | |
| "Size": 45.83, | |
| "Shape": 22.2, | |
| "Color": 39.99, | |
| "Action-Overall": 29.74, | |
| "Hand": 11.54, | |
| "Full body": 35.31, | |
| "Animal": 32.25, | |
| "Non Contact": 16.96, | |
| "Contact": 14.08, | |
| "State": 41.11, | |
| "Relationship-Overall": 28.23, | |
| "Composition": 26.02, | |
| "Similarity": 26.6, | |
| "Inclusion": 30.46, | |
| "Comparison": 31.8, | |
| "Compound-Overall": 31.47, | |
| "Imagination": 38.92, | |
| "Feature matching": 14.95, | |
| "Grammar-Overall": 44.04, | |
| "Pronoun Reference": 46.43, | |
| "Consistency": 24.6, | |
| "Negation": 59.15, | |
| "Layout-Overall": 40.56, | |
| "2D": 38.98, | |
| "3D": 42.57, | |
| "Logical Reasoning": 20.15, | |
| "Text": 1.09 | |
| }, | |
| { | |
| "model": "CogView4", | |
| "link": "https://arxiv.org/pdf/2403.05121", | |
| "hf": "https://huggingface.co/zai-org/CogView4-6B", | |
| "open_source": true, | |
| "release_date": "2024-03", | |
| "Overall": 68.09, | |
| "Style": 89.62, | |
| "World Knowledge": 89.31, | |
| "Attribute-Overall": 80.99, | |
| "Quantity": 73.4, | |
| "Expression": 65.69, | |
| "Material": 80.35, | |
| "Size": 85.98, | |
| "Shape": 73.43, | |
| "Color": 88.84, | |
| "Action-Overall": 67.94, | |
| "Hand": 67.31, | |
| "Full body": 68.75, | |
| "Animal": 71.01, | |
| "Non Contact": 58.04, | |
| "Contact": 63.79, | |
| "State": 70.65, | |
| "Relationship-Overall": 70.58, | |
| "Composition": 66.07, | |
| "Similarity": 64.1, | |
| "Inclusion": 80.17, | |
| "Comparison": 75.97, | |
| "Compound-Overall": 69.91, | |
| "Imagination": 71.94, | |
| "Feature matching": 65.42, | |
| "Grammar-Overall": 70.94, | |
| "Pronoun Reference": 83.33, | |
| "Consistency": 69.05, | |
| "Negation": 61.62, | |
| "Layout-Overall": 81.51, | |
| "2D": 84.46, | |
| "3D": 77.72, | |
| "Logical Reasoning": 51.94, | |
| "Text": 8.15 | |
| }, | |
| { | |
| "model": "Kolors", | |
| "link": "https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf", | |
| "hf": "https://huggingface.co/Kwai-Kolors/Kolors", | |
| "open_source": true, | |
| "release_date": "2024-7", | |
| "Overall": 65.12, | |
| "Style": 90.61, | |
| "World Knowledge": 87.14, | |
| "Attribute-Overall": 81.18, | |
| "Quantity": 63.83, | |
| "Expression": 64.86, | |
| "Material": 82.98, | |
| "Size": 83.52, | |
| "Shape": 70.8, | |
| "Color": 90.25, | |
| "Action-Overall": 64.49, | |
| "Hand": 58.97, | |
| "Full body": 57.19, | |
| "Animal": 63.41, | |
| "Non Contact": 65.18, | |
| "Contact": 50.57, | |
| "State": 73.42, | |
| "Relationship-Overall": 71.23, | |
| "Composition": 69.9, | |
| "Similarity": 74.68, | |
| "Inclusion": 74.43, | |
| "Comparison": 68.45, | |
| "Compound-Overall": 64.17, | |
| "Imagination": 67.83, | |
| "Feature matching": 56.07, | |
| "Grammar-Overall": 63.96, | |
| "Pronoun Reference": 81.35, | |
| "Consistency": 62.3, | |
| "Negation": 50.0, | |
| "Layout-Overall": 74.6, | |
| "2D": 72.46, | |
| "3D": 77.36, | |
| "Logical Reasoning": 47.82, | |
| "Text": 5.98 | |
| }, | |
| { | |
| "model": "Janus-Pro", | |
| "link": "https://arxiv.org/pdf/2501.17811", | |
| "hf": "https://huggingface.co/deepseek-ai/Janus-Pro-7B", | |
| "open_source": true, | |
| "release_date": "2025-01", | |
| "Overall": 60.21, | |
| "Style": 91.28, | |
| "World Knowledge": 75.87, | |
| "Attribute-Overall": 65.79, | |
| "Quantity": 44.15, | |
| "Expression": 52.92, | |
| "Material": 69.8, | |
| "Size": 78.22, | |
| "Shape": 56.99, | |
| "Color": 69.18, | |
| "Action-Overall": 54.33, | |
| "Hand": 37.82, | |
| "Full body": 51.25, | |
| "Animal": 63.04, | |
| "Non Contact": 48.21, | |
| "Contact": 51.72, | |
| "State": 60.28, | |
| "Relationship-Overall": 62.61, | |
| "Composition": 62.5, | |
| "Similarity": 57.05, | |
| "Inclusion": 66.38, | |
| "Comparison": 63.83, | |
| "Compound-Overall": 65.62, | |
| "Imagination": 72.47, | |
| "Feature matching": 50.47, | |
| "Grammar-Overall": 68.53, | |
| "Pronoun Reference": 72.22, | |
| "Consistency": 61.11, | |
| "Negation": 71.83, | |
| "Layout-Overall": 66.59, | |
| "2D": 66.38, | |
| "3D": 66.85, | |
| "Logical Reasoning": 49.27, | |
| "Text": 2.17 | |
| }, | |
| { | |
| "model": "Seedream-4.0", | |
| "link": "https://www.volcengine.com/docs/82379/1541523", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-09", | |
| "Overall": 90.35, | |
| "Style": 98.42, | |
| "World Knowledge": 96.39, | |
| "Attribute-Overall": 95.54, | |
| "Quantity": 86.7, | |
| "Expression": 90.69, | |
| "Material": 96.08, | |
| "Size": 95.45, | |
| "Shape": 93.71, | |
| "Color": 98.43, | |
| "Action-Overall": 89.29, | |
| "Hand": 84.94, | |
| "Full body": 91.56, | |
| "Animal": 92.03, | |
| "Non Contact": 92.41, | |
| "Contact": 86.21, | |
| "State": 89.53, | |
| "Relationship-Overall": 88.69, | |
| "Composition": 86.35, | |
| "Similarity": 83.01, | |
| "Inclusion": 93.39, | |
| "Comparison": 93.45, | |
| "Compound-Overall": 87.72, | |
| "Imagination": 87.66, | |
| "Feature matching": 87.85, | |
| "Grammar-Overall": 83.63, | |
| "Pronoun Reference": 94.44, | |
| "Consistency": 82.14, | |
| "Negation": 75.35, | |
| "Layout-Overall": 91.9, | |
| "2D": 92.66, | |
| "3D": 90.94, | |
| "Logical Reasoning": 80.58, | |
| "Text": 91.3 | |
| }, | |
| { | |
| "model": "DALL-E-3", | |
| "link": "https://openai.com/zh-Hans-CN/index/dall-e-3/", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2023-09", | |
| "Overall": 71.16, | |
| "Style": 95.85, | |
| "World Knowledge": 94.36, | |
| "Attribute-Overall": 85.41, | |
| "Quantity": 64.36, | |
| "Expression": 71.11, | |
| "Material": 88.93, | |
| "Size": 90.72, | |
| "Shape": 77.62, | |
| "Color": 91.3, | |
| "Action-Overall": 70.59, | |
| "Hand": 61.22, | |
| "Full body": 65.94, | |
| "Animal": 74.28, | |
| "Non Contact": 67.41, | |
| "Contact": 62.64, | |
| "State": 77.37, | |
| "Relationship-Overall": 80.12, | |
| "Composition": 81.63, | |
| "Similarity": 73.72, | |
| "Inclusion": 85.63, | |
| "Comparison": 77.43, | |
| "Compound-Overall": 75.87, | |
| "Imagination": 80.38, | |
| "Feature matching": 65.89, | |
| "Grammar-Overall": 70.81, | |
| "Pronoun Reference": 80.16, | |
| "Consistency": 74.21, | |
| "Negation": 59.51, | |
| "Layout-Overall": 73.33, | |
| "2D": 70.48, | |
| "3D": 76.99, | |
| "Logical Reasoning": 61.41, | |
| "Text": 3.8 | |
| }, | |
| { | |
| "model": "Qwen-Image", | |
| "link": "https://arxiv.org/pdf/2508.02324", | |
| "hf": "https://huggingface.co/Qwen/Qwen-Image", | |
| "open_source": true, | |
| "release_date": "2025-08", | |
| "Overall": 86.91, | |
| "Style": 97.84, | |
| "World Knowledge": 95.66, | |
| "Attribute-Overall": 95.04, | |
| "Quantity": 89.36, | |
| "Expression": 91.11, | |
| "Material": 96.23, | |
| "Size": 93.56, | |
| "Shape": 90.91, | |
| "Color": 97.9, | |
| "Action-Overall": 86.56, | |
| "Hand": 83.33, | |
| "Full body": 90.62, | |
| "Animal": 89.86, | |
| "Non Contact": 86.61, | |
| "Contact": 79.6, | |
| "State": 87.75, | |
| "Relationship-Overall": 87.61, | |
| "Composition": 85.59, | |
| "Similarity": 84.29, | |
| "Inclusion": 91.67, | |
| "Comparison": 90.53, | |
| "Compound-Overall": 82.99, | |
| "Imagination": 83.44, | |
| "Feature matching": 82.01, | |
| "Grammar-Overall": 76.9, | |
| "Pronoun Reference": 94.05, | |
| "Consistency": 83.73, | |
| "Negation": 55.63, | |
| "Layout-Overall": 90.48, | |
| "2D": 92.09, | |
| "3D": 88.41, | |
| "Logical Reasoning": 69.9, | |
| "Text": 86.14 | |
| }, | |
| { | |
| "model": "Imagen-4.0-generate-preview-06-06", | |
| "link": "https://deepmind.google/models/imagen/", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-01", | |
| "Overall": 79.9, | |
| "Style": 95.6, | |
| "World Knowledge": 97.98, | |
| "Attribute-Overall": 90.94, | |
| "Quantity": 82.45, | |
| "Expression": 80.42, | |
| "Material": 92.24, | |
| "Size": 91.29, | |
| "Shape": 85.84, | |
| "Color": 96.28, | |
| "Action-Overall": 84.55, | |
| "Hand": 81.09, | |
| "Full body": 84.69, | |
| "Animal": 82.25, | |
| "Non Contact": 83.48, | |
| "Contact": 85.63, | |
| "State": 86.07, | |
| "Relationship-Overall": 88.04, | |
| "Composition": 87.24, | |
| "Similarity": 82.05, | |
| "Inclusion": 93.97, | |
| "Comparison": 89.08, | |
| "Compound-Overall": 86.63, | |
| "Imagination": 88.71, | |
| "Feature matching": 82.01, | |
| "Grammar-Overall": 82.74, | |
| "Pronoun Reference": 92.06, | |
| "Consistency": 81.75, | |
| "Negation": 75.35, | |
| "Layout-Overall": 90.48, | |
| "2D": 90.25, | |
| "3D": 90.76, | |
| "Logical Reasoning": 77.18, | |
| "Text": 4.89 | |
| }, | |
| { | |
| "model": "Recraft", | |
| "link": "https://www.recraft.ai/docs#generate-image", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2024-12", | |
| "Overall": 56.9, | |
| "Style": 86.38, | |
| "World Knowledge": 85.55, | |
| "Attribute-Overall": 74.31, | |
| "Quantity": 61.7, | |
| "Expression": 60.56, | |
| "Material": 73.72, | |
| "Size": 79.92, | |
| "Shape": 65.03, | |
| "Color": 82.39, | |
| "Action-Overall": 54.65, | |
| "Hand": 44.23, | |
| "Full body": 57.81, | |
| "Animal": 60.87, | |
| "Non Contact": 42.86, | |
| "Contact": 43.39, | |
| "State": 61.66, | |
| "Relationship-Overall": 57.44, | |
| "Composition": 54.72, | |
| "Similarity": 49.68, | |
| "Inclusion": 63.22, | |
| "Comparison": 63.59, | |
| "Compound-Overall": 50.0, | |
| "Imagination": 50.95, | |
| "Feature matching": 47.9, | |
| "Grammar-Overall": 57.49, | |
| "Pronoun Reference": 71.83, | |
| "Consistency": 55.95, | |
| "Negation": 46.13, | |
| "Layout-Overall": 64.52, | |
| "2D": 64.12, | |
| "3D": 65.04, | |
| "Logical Reasoning": 36.17, | |
| "Text": 2.45 | |
| }, | |
| { | |
| "model": "Nano Banana", | |
| "link": "https://ainanobanana.io/", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-08", | |
| "Overall": 83.17, | |
| "Style": 98.41, | |
| "World Knowledge": 97.38, | |
| "Attribute-Overall": 93.29, | |
| "Quantity": 90.37, | |
| "Expression": 85.06, | |
| "Material": 93.11, | |
| "Size": 94.29, | |
| "Shape": 87.99, | |
| "Color": 98.1, | |
| "Action-Overall": 85.55, | |
| "Hand": 84.42, | |
| "Full body": 88.09, | |
| "Animal": 84.06, | |
| "Non Contact": 87.05, | |
| "Contact": 82.9, | |
| "State": 86.07, | |
| "Relationship-Overall": 91.32, | |
| "Composition": 90.59, | |
| "Similarity": 86.5, | |
| "Inclusion": 96.83, | |
| "Comparison": 91.71, | |
| "Compound-Overall": 91.21, | |
| "Imagination": 92.14, | |
| "Feature matching": 89.13, | |
| "Grammar-Overall": 88.35, | |
| "Pronoun Reference": 94.78, | |
| "Consistency": 88.1, | |
| "Negation": 82.86, | |
| "Layout-Overall": 93.15, | |
| "2D": 93.19, | |
| "3D": 93.1, | |
| "Logical Reasoning": 82.4, | |
| "Text": 10.68 | |
| }, | |
| { | |
| "model": "GPT-4o", | |
| "link": "https://platform.openai.com/docs/guides/image-generation", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-03", | |
| "Overall": 90.51, | |
| "Style": 99.41, | |
| "World Knowledge": 97.96, | |
| "Attribute-Overall": 94.72, | |
| "Quantity": 85.87, | |
| "Expression": 92.56, | |
| "Material": 94.43, | |
| "Size": 95.23, | |
| "Shape": 94.23, | |
| "Color": 96.59, | |
| "Action-Overall": 89.33, | |
| "Hand": 91.12, | |
| "Full body": 92.5, | |
| "Animal": 89.49, | |
| "Non Contact": 91.52, | |
| "Contact": 86.78, | |
| "State": 88.14, | |
| "Relationship-Overall": 92.59, | |
| "Composition": 91.93, | |
| "Similarity": 89.1, | |
| "Inclusion": 95.64, | |
| "Comparison": 93.93, | |
| "Compound-Overall": 94.59, | |
| "Imagination": 95.36, | |
| "Feature matching": 92.87, | |
| "Grammar-Overall": 94.11, | |
| "Pronoun Reference": 96.37, | |
| "Consistency": 92.86, | |
| "Negation": 93.24, | |
| "Layout-Overall": 95.21, | |
| "2D": 95.01, | |
| "3D": 95.47, | |
| "Logical Reasoning": 90.05, | |
| "Text": 57.14 | |
| }, | |
| { | |
| "model": "FLUX-kontext-max", | |
| "link": "https://bfl.ai/models/flux-kontext", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-05", | |
| "Overall": 75.24, | |
| "Style": 97.59, | |
| "World Knowledge": 92.31, | |
| "Attribute-Overall": 86.17, | |
| "Quantity": 72.34, | |
| "Expression": 71.41, | |
| "Material": 87.48, | |
| "Size": 88.83, | |
| "Shape": 81.64, | |
| "Color": 92.8, | |
| "Action-Overall": 75.71, | |
| "Hand": 76.28, | |
| "Full body": 70.22, | |
| "Animal": 79.35, | |
| "Non Contact": 69.2, | |
| "Contact": 74.43, | |
| "State": 78.16, | |
| "Relationship-Overall": 81.27, | |
| "Composition": 78.95, | |
| "Similarity": 73.4, | |
| "Inclusion": 87.25, | |
| "Comparison": 86.65, | |
| "Compound-Overall": 80.16, | |
| "Imagination": 84.6, | |
| "Feature matching": 70.33, | |
| "Grammar-Overall": 78.77, | |
| "Pronoun Reference": 88.76, | |
| "Consistency": 76.19, | |
| "Negation": 72.24, | |
| "Layout-Overall": 87.58, | |
| "2D": 87.01, | |
| "3D": 88.32, | |
| "Logical Reasoning": 68.2, | |
| "Text": 4.62 | |
| }, | |
| { | |
| "model": "Hunyuan-DiT", | |
| "link": "https://arxiv.org/pdf/2405.08748", | |
| "hf": "https://huggingface.co/Tencent-Hunyuan/HunyuanDiT", | |
| "open_source": true, | |
| "release_date": "2024-05", | |
| "Overall": 55.57, | |
| "Style": 94.1, | |
| "World Knowledge": 76.16, | |
| "Attribute-Overall": 69.72, | |
| "Quantity": 66.49, | |
| "Expression": 54.03, | |
| "Material": 71.76, | |
| "Size": 76.14, | |
| "Shape": 58.57, | |
| "Color": 76.1, | |
| "Action-Overall": 51.04, | |
| "Hand": 41.03, | |
| "Full body": 51.56, | |
| "Animal": 57.25, | |
| "Non Contact": 41.52, | |
| "Contact": 37.36, | |
| "State": 59.09, | |
| "Relationship-Overall": 55.6, | |
| "Composition": 59.69, | |
| "Similarity": 48.08, | |
| "Inclusion": 56.9, | |
| "Comparison": 52.43, | |
| "Compound-Overall": 52.03, | |
| "Imagination": 57.49, | |
| "Feature matching": 39.95, | |
| "Grammar-Overall": 60.03, | |
| "Pronoun Reference": 63.49, | |
| "Consistency": 60.71, | |
| "Negation": 56.34, | |
| "Layout-Overall": 61.67, | |
| "2D": 60.73, | |
| "3D": 62.86, | |
| "Logical Reasoning": 33.98, | |
| "Text": 1.36 | |
| }, | |
| { | |
| "model": "Imagen-3.0-generate-002", | |
| "link": "https://arxiv.org/pdf/2408.07009", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-02", | |
| "Overall": 0.98, | |
| "Style": 2.66, | |
| "World Knowledge": 0.29, | |
| "Attribute-Overall": 0.69, | |
| "Quantity": 0.0, | |
| "Expression": 0.0, | |
| "Material": 0.0, | |
| "Size": 1.52, | |
| "Shape": 0.35, | |
| "Color": 1.36, | |
| "Action-Overall": 0.6, | |
| "Hand": 0.0, | |
| "Full body": 0.31, | |
| "Animal": 0.0, | |
| "Non Contact": 0.0, | |
| "Contact": 0.0, | |
| "State": 1.38, | |
| "Relationship-Overall": 0.32, | |
| "Composition": 0.13, | |
| "Similarity": 0.64, | |
| "Inclusion": 0.0, | |
| "Comparison": 0.73, | |
| "Compound-Overall": 0.0, | |
| "Imagination": 0.0, | |
| "Feature matching": 0.0, | |
| "Grammar-Overall": 4.06, | |
| "Pronoun Reference": 0.79, | |
| "Consistency": 1.19, | |
| "Negation": 9.51, | |
| "Layout-Overall": 0.87, | |
| "2D": 1.55, | |
| "3D": 0.0, | |
| "Logical Reasoning": 0.0, | |
| "Text": 0.27 | |
| }, | |
| { | |
| "model": "wan2.2-t2i-plus", | |
| "link": "https://help.aliyun.com/zh/model-studio/text-to-image-v2-api-reference", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-07", | |
| "Overall": 70.05, | |
| "Style": 91.61, | |
| "World Knowledge": 88.73, | |
| "Attribute-Overall": 82.42, | |
| "Quantity": 78.19, | |
| "Expression": 66.94, | |
| "Material": 82.15, | |
| "Size": 84.09, | |
| "Shape": 77.1, | |
| "Color": 89.99, | |
| "Action-Overall": 70.22, | |
| "Hand": 67.95, | |
| "Full body": 69.06, | |
| "Animal": 72.46, | |
| "Non Contact": 64.29, | |
| "Contact": 63.79, | |
| "State": 74.21, | |
| "Relationship-Overall": 73.65, | |
| "Composition": 70.15, | |
| "Similarity": 70.83, | |
| "Inclusion": 80.17, | |
| "Comparison": 76.94, | |
| "Compound-Overall": 71.51, | |
| "Imagination": 74.26, | |
| "Feature matching": 65.42, | |
| "Grammar-Overall": 70.05, | |
| "Pronoun Reference": 83.73, | |
| "Consistency": 62.7, | |
| "Negation": 64.44, | |
| "Layout-Overall": 80.08, | |
| "2D": 81.5, | |
| "3D": 78.26, | |
| "Logical Reasoning": 57.04, | |
| "Text": 15.22 | |
| }, | |
| { | |
| "model": "Imagen-4.0-Fast-preview-06-06", | |
| "link": "https://deepmind.google/models/imagen/", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-01", | |
| "Overall": 1.2, | |
| "Style": 2.91, | |
| "World Knowledge": 0.0, | |
| "Attribute-Overall": 0.93, | |
| "Quantity": 0.0, | |
| "Expression": 2.08, | |
| "Material": 0.53, | |
| "Size": 0.0, | |
| "Shape": 1.22, | |
| "Color": 1.05, | |
| "Action-Overall": 0.44, | |
| "Hand": 0.32, | |
| "Full body": 0.0, | |
| "Animal": 0.0, | |
| "Non Contact": 0.0, | |
| "Contact": 0.0, | |
| "State": 0.99, | |
| "Relationship-Overall": 0.7, | |
| "Composition": 0.77, | |
| "Similarity": 0.96, | |
| "Inclusion": 0.57, | |
| "Comparison": 0.49, | |
| "Compound-Overall": 0.07, | |
| "Imagination": 0.11, | |
| "Feature matching": 0.0, | |
| "Grammar-Overall": 4.95, | |
| "Pronoun Reference": 0.79, | |
| "Consistency": 0.4, | |
| "Negation": 12.68, | |
| "Layout-Overall": 1.51, | |
| "2D": 2.54, | |
| "3D": 0.18, | |
| "Logical Reasoning": 0.24, | |
| "Text": 0.27 | |
| }, | |
| { | |
| "model": "Seedream-3.0", | |
| "link": "https://www.byteplus.com/en/product/Seedream", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-06", | |
| "Overall": 86.14, | |
| "Style": 98.42, | |
| "World Knowledge": 95.36, | |
| "Attribute-Overall": 93.93, | |
| "Quantity": 85.64, | |
| "Expression": 83.98, | |
| "Material": 96.39, | |
| "Size": 90.53, | |
| "Shape": 93.36, | |
| "Color": 97.9, | |
| "Action-Overall": 84.53, | |
| "Hand": 81.41, | |
| "Full body": 89.06, | |
| "Animal": 86.13, | |
| "Non Contact": 85.71, | |
| "Contact": 79.19, | |
| "State": 85.18, | |
| "Relationship-Overall": 87.55, | |
| "Composition": 84.57, | |
| "Similarity": 83.01, | |
| "Inclusion": 93.1, | |
| "Comparison": 91.99, | |
| "Compound-Overall": 83.11, | |
| "Imagination": 83.83, | |
| "Feature matching": 81.54, | |
| "Grammar-Overall": 77.54, | |
| "Pronoun Reference": 88.89, | |
| "Consistency": 82.14, | |
| "Negation": 63.38, | |
| "Layout-Overall": 90.16, | |
| "2D": 90.68, | |
| "3D": 89.49, | |
| "Logical Reasoning": 68.45, | |
| "Text": 82.34 | |
| }, | |
| { | |
| "model": "Imagen-4.0-Ultra-preview-06-06", | |
| "link": "https://deepmind.google/models/imagen/", | |
| "hf": "-", | |
| "open_source": false, | |
| "release_date": "2025-06", | |
| "Overall": 83.86, | |
| "Style": 97.34, | |
| "World Knowledge": 97.4, | |
| "Attribute-Overall": 93.59, | |
| "Quantity": 88.3, | |
| "Expression": 83.75, | |
| "Material": 94.13, | |
| "Size": 95.27, | |
| "Shape": 90.91, | |
| "Color": 97.8, | |
| "Action-Overall": 88.8, | |
| "Hand": 83.97, | |
| "Full body": 90.94, | |
| "Animal": 88.41, | |
| "Non Contact": 87.5, | |
| "Contact": 88.79, | |
| "State": 90.02, | |
| "Relationship-Overall": 92.35, | |
| "Composition": 92.22, | |
| "Similarity": 87.82, | |
| "Inclusion": 96.84, | |
| "Comparison": 92.23, | |
| "Compound-Overall": 92.51, | |
| "Imagination": 93.99, | |
| "Feature matching": 89.25, | |
| "Grammar-Overall": 88.83, | |
| "Pronoun Reference": 96.83, | |
| "Consistency": 90.08, | |
| "Negation": 80.63, | |
| "Layout-Overall": 94.13, | |
| "2D": 94.77, | |
| "3D": 93.3, | |
| "Logical Reasoning": 86.89, | |
| "Text": 6.79 | |
| } | |
| ] | |
| } |