akhaliq HF Staff commited on
Commit
663212e
·
verified ·
1 Parent(s): bca471a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -331
app.py CHANGED
@@ -3,105 +3,107 @@ import gradio as gr
3
  import torch
4
  import numpy as np
5
  import random
 
 
6
  from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
7
  from transformers import AutoTokenizer, Qwen3ForCausalLM
8
  from controlnet_aux.processor import Processor
9
  from PIL import Image
 
10
 
11
- # Try to import ControlNet components, fall back to basic pipeline if unavailable
 
 
 
 
 
12
  try:
13
- from videox_fun.pipeline import ZImageControlPipeline
14
- from videox_fun.models import ZImageControlTransformer2DModel
15
- CONTROLNET_AVAILABLE = True
16
  except ImportError:
17
- from diffusers import ZImagePipeline
18
- CONTROLNET_AVAILABLE = False
19
- print("ControlNet components not available. Running in basic mode.")
20
 
 
21
  MAX_SEED = np.iinfo(np.int32).max
22
  MAX_IMAGE_SIZE = 1280
23
 
24
- # Configuration
25
- MODEL_REPO = "Tongyi-MAI/Z-Image-Turbo"
26
- CONTROLNET_WEIGHTS = "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" # Optional local path
 
27
 
28
  print("Loading Z-Image Turbo model...")
29
- print("This may take a few minutes on first run...")
30
-
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  weight_dtype = torch.bfloat16
33
 
34
- # Load models
35
- if CONTROLNET_AVAILABLE:
36
- print("Loading with ControlNet support...")
37
-
38
- # Load transformer with control layers
39
- transformer = ZImageControlTransformer2DModel.from_pretrained(
40
- MODEL_REPO,
41
- subfolder="transformer",
42
- transformer_additional_kwargs={
43
- "control_layers_places": [0, 5, 10, 15, 20, 25],
44
- "control_in_dim": 16
45
- },
46
- ).to(device, weight_dtype)
47
-
48
- # Optionally load ControlNet weights if available
49
  try:
50
- from safetensors.torch import load_file
51
- import os
52
- if os.path.exists(CONTROLNET_WEIGHTS):
53
- print(f"Loading ControlNet weights from {CONTROLNET_WEIGHTS}")
54
- state_dict = load_file(CONTROLNET_WEIGHTS)
55
- state_dict = state_dict.get("state_dict", state_dict)
56
- m, u = transformer.load_state_dict(state_dict, strict=False)
57
- print(f"Loaded ControlNet: {len(m)} missing keys, {len(u)} unexpected keys")
58
  except Exception as e:
59
- print(f"Could not load ControlNet weights: {e}")
60
-
61
- # Load other components
62
- vae = AutoencoderKL.from_pretrained(
63
- MODEL_REPO,
64
- subfolder="vae",
65
- ).to(device, weight_dtype)
66
-
67
- tokenizer = AutoTokenizer.from_pretrained(
68
- MODEL_REPO,
69
- subfolder="tokenizer"
70
- )
71
-
72
- text_encoder = Qwen3ForCausalLM.from_pretrained(
73
- MODEL_REPO,
74
- subfolder="text_encoder",
75
- torch_dtype=weight_dtype,
76
- ).to(device)
77
-
78
- scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
79
- MODEL_REPO,
80
- subfolder="scheduler"
81
- )
82
-
83
- pipe = ZImageControlPipeline(
84
- vae=vae,
85
- tokenizer=tokenizer,
86
- text_encoder=text_encoder,
87
- transformer=transformer,
88
- scheduler=scheduler,
89
- )
90
- pipe.to(device, weight_dtype)
91
-
92
  else:
93
- print("Loading basic Z-Image Turbo (no ControlNet)...")
94
- pipe = ZImagePipeline.from_pretrained(
95
- MODEL_REPO,
96
- torch_dtype=weight_dtype,
97
- low_cpu_mem_usage=False,
98
- )
99
- pipe.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  print(f"Model loaded successfully on {device}!")
102
 
 
 
103
  def rescale_image(image, scale, divisible_by=16):
104
  """Rescale image and ensure dimensions are divisible by specified value."""
 
 
 
105
  width, height = image.size
106
  new_width = int(width * scale)
107
  new_height = int(height * scale)
@@ -150,43 +152,36 @@ def generate_image(
150
  guidance_scale=1.0,
151
  seed=42,
152
  randomize_seed=True,
 
153
  progress=gr.Progress(track_tqdm=True)
154
  ):
155
- """Generate image with optional ControlNet guidance."""
156
 
157
  if not prompt.strip():
158
  raise gr.Error("Please enter a prompt to generate an image.")
159
 
160
- # Set seed
 
 
 
 
 
 
 
 
 
 
161
  if randomize_seed:
162
  seed = random.randint(0, MAX_SEED)
163
  generator = torch.Generator(device).manual_seed(seed)
164
 
165
- # Basic generation (no control image)
166
- if input_image is None or not CONTROLNET_AVAILABLE:
167
- if input_image is not None and not CONTROLNET_AVAILABLE:
168
- gr.Warning("ControlNet not available. Generating without control image.")
169
-
170
- progress(0.1, desc="Generating image...")
171
-
172
- result = pipe(
173
- prompt=prompt,
174
- negative_prompt=negative_prompt if negative_prompt else None,
175
- height=1024,
176
- width=1024,
177
- num_inference_steps=num_inference_steps,
178
- guidance_scale=0.0 if not CONTROLNET_AVAILABLE else guidance_scale,
179
- generator=generator,
180
- )
181
-
182
- image = result.images[0]
183
- progress(1.0, desc="Complete!")
184
- return image, seed, None
185
-
186
- # ControlNet generation
187
- progress(0.1, desc="Processing control image...")
188
 
189
- # Map control mode to processor
190
  processor_map = {
191
  'Canny': 'canny',
192
  'HED': 'softedge_hed',
@@ -194,49 +189,56 @@ def generate_image(
194
  'MLSD': 'mlsd',
195
  'Pose': 'openpose_full'
196
  }
197
-
198
  processor_id = processor_map.get(control_mode, 'canny')
199
- processor = Processor(processor_id)
200
 
201
- # Process control image
202
- control_image, width, height = rescale_image(input_image, image_scale, 16)
203
- control_image_1024 = control_image.resize((1024, 1024))
204
-
205
- progress(0.3, desc=f"Applying {control_mode} detection...")
206
- control_image_processed = processor(control_image_1024, to_pil=True)
207
- control_image_processed = control_image_processed.resize((width, height))
208
-
209
- # Convert to latent
210
- progress(0.5, desc="Converting to latent space...")
211
- control_image_torch = get_image_latent(
212
- control_image_processed,
 
 
 
 
 
 
 
213
  sample_size=[height, width]
214
  )[:, :, 0]
215
 
216
- # Generate with control
217
- progress(0.6, desc="Generating controlled image...")
218
 
219
  try:
220
  result = pipe(
221
- prompt=prompt,
222
- negative_prompt=negative_prompt if negative_prompt else None,
223
  height=height,
224
  width=width,
225
  generator=generator,
226
  guidance_scale=guidance_scale,
227
- control_image=control_image_torch,
228
  num_inference_steps=num_inference_steps,
229
  control_context_scale=control_context_scale,
230
  )
231
 
232
  image = result.images[0]
233
  progress(1.0, desc="Complete!")
234
- return image, seed, control_image_processed
 
235
 
236
  except Exception as e:
237
  raise gr.Error(f"Generation failed: {str(e)}")
238
 
239
- # Apple-style CSS
 
240
  apple_css = """
241
  .gradio-container {
242
  max-width: 1200px !important;
@@ -244,269 +246,127 @@ apple_css = """
244
  padding: 48px 20px !important;
245
  font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', sans-serif !important;
246
  }
247
-
248
- .header-container {
249
- text-align: center;
250
- margin-bottom: 48px;
251
- }
252
-
253
  .main-title {
254
- font-size: 56px !important;
255
- font-weight: 600 !important;
256
- letter-spacing: -0.02em !important;
257
- color: #1d1d1f !important;
258
  margin: 0 0 12px 0 !important;
259
  }
260
-
261
  .subtitle {
262
- font-size: 21px !important;
263
- color: #6e6e73 !important;
264
  margin: 0 0 24px 0 !important;
265
  }
266
-
267
  .info-badge {
268
- display: inline-block;
269
- background: #0071e3;
270
- color: white;
271
- padding: 6px 16px;
272
- border-radius: 20px;
273
- font-size: 14px;
274
- font-weight: 500;
275
- margin-bottom: 16px;
276
  }
277
-
278
  textarea {
279
- font-size: 17px !important;
280
- border-radius: 12px !important;
281
- border: 1px solid #d2d2d7 !important;
282
- padding: 12px 16px !important;
283
  }
284
-
285
  textarea:focus {
286
- border-color: #0071e3 !important;
287
- box-shadow: 0 0 0 4px rgba(0, 113, 227, 0.15) !important;
288
  outline: none !important;
289
  }
290
-
291
  button.primary {
292
- font-size: 17px !important;
293
- padding: 12px 32px !important;
294
- border-radius: 980px !important;
295
- background: #0071e3 !important;
296
- border: none !important;
297
- color: #ffffff !important;
298
  transition: all 0.2s ease !important;
299
  }
300
-
301
  button.primary:hover {
302
- background: #0077ed !important;
303
- transform: scale(1.02) !important;
304
  }
305
-
306
  .footer-text {
307
- text-align: center;
308
- margin-top: 48px;
309
- font-size: 14px !important;
310
  color: #86868b !important;
311
  }
312
-
313
- @media (max-width: 768px) {
314
- .main-title { font-size: 40px !important; }
315
- .subtitle { font-size: 19px !important; }
316
- }
317
  """
318
 
319
- # Create interface
320
- with gr.Blocks(title="Z-Image Turbo with ControlNet") as demo:
321
 
322
- # Header
323
- gr.HTML(f"""
324
  <div class="header-container">
325
- <div class="info-badge">{'✓ ControlNet Enabled' if CONTROLNET_AVAILABLE else '⚠ Basic Mode'}</div>
326
  <h1 class="main-title">Z-Image Turbo</h1>
327
- <p class="subtitle">Transform your ideas into stunning visuals with AI-powered control</p>
328
  </div>
329
  """)
330
 
331
  with gr.Row():
332
- # Left column - Inputs
333
  with gr.Column(scale=1):
334
  prompt = gr.Textbox(
335
  label="Prompt",
336
  placeholder="Describe the image you want to create...",
337
- lines=3,
338
- max_lines=6,
339
  )
340
 
 
 
 
 
341
  negative_prompt = gr.Textbox(
342
  label="Negative Prompt",
343
- placeholder="What to avoid in the image...",
344
  value="blurry, ugly, bad quality",
345
- lines=2,
346
  )
347
 
348
- if CONTROLNET_AVAILABLE:
349
- input_image = gr.Image(
350
- label="Control Image (Optional)",
351
- type="pil",
352
- sources=['upload', 'clipboard'],
353
- height=290,
354
- )
355
-
356
- control_mode = gr.Radio(
357
- choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
358
- value="Canny",
359
- label="Control Mode",
360
- info="Choose edge/depth/pose detection method"
361
- )
362
 
363
  with gr.Accordion("Advanced Settings", open=False):
364
- num_inference_steps = gr.Slider(
365
- label="Inference Steps",
366
- minimum=1,
367
- maximum=30,
368
- step=1,
369
- value=9,
370
- info="More steps = higher quality but slower"
371
- )
372
 
373
- guidance_scale = gr.Slider(
374
- label="Guidance Scale",
375
- minimum=0.0,
376
- maximum=10.0,
377
- step=0.1,
378
- value=1.0,
379
- info="How closely to follow the prompt"
380
- )
381
 
382
- if CONTROLNET_AVAILABLE:
383
- control_context_scale = gr.Slider(
384
- label="Control Strength",
385
- minimum=0.0,
386
- maximum=1.0,
387
- step=0.01,
388
- value=0.75,
389
- info="0.65-0.80 recommended for best results"
390
- )
391
-
392
- image_scale = gr.Slider(
393
- label="Image Scale",
394
- minimum=0.5,
395
- maximum=2.0,
396
- step=0.1,
397
- value=1.0,
398
- info="Resize control image"
399
- )
400
-
401
- seed = gr.Slider(
402
- label="Seed",
403
- minimum=0,
404
- maximum=MAX_SEED,
405
- step=1,
406
- value=42,
407
- )
408
-
409
- randomize_seed = gr.Checkbox(
410
- label="Randomize Seed",
411
- value=True
412
- )
413
-
414
- generate_btn = gr.Button(
415
- "Generate Image",
416
- variant="primary",
417
- size="lg",
418
- elem_classes="primary"
419
- )
420
-
421
- # Right column - Outputs
422
  with gr.Column(scale=1):
423
- output_image = gr.Image(
424
- label="Generated Image",
425
- type="pil",
426
- show_label=True,
427
- )
428
-
429
- seed_output = gr.Number(
430
- label="Used Seed",
431
- precision=0,
432
- )
433
 
434
- if CONTROLNET_AVAILABLE:
435
- with gr.Accordion("Preprocessor Output", open=False):
436
- control_output = gr.Image(
437
- label="Processed Control Image",
438
- type="pil",
439
- )
440
-
441
  # Footer
442
  gr.HTML("""
443
  <div class="footer-text">
444
- <p style="margin-bottom: 8px;">Powered by Z-Image Turbo from Tongyi-MAI</p>
445
- <p style="font-size: 13px;">
446
- <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
447
- Model Card
448
- </a> •
449
- <a href="https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
450
- ControlNet
451
- </a> •
452
- <a href="https://github.com/aigc-apps/VideoX-Fun" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
453
- GitHub
454
- </a>
455
- </p>
456
  </div>
457
  """)
458
-
459
- # Event handlers
460
- generate_inputs = [
461
- prompt,
462
- negative_prompt,
463
- ]
464
-
465
- if CONTROLNET_AVAILABLE:
466
- generate_inputs.extend([
467
- input_image,
468
- control_mode,
469
- control_context_scale,
470
- image_scale,
471
- ])
472
- generate_inputs.extend([
473
- num_inference_steps,
474
- guidance_scale,
475
- seed,
476
- randomize_seed,
477
- ])
478
- generate_outputs = [output_image, seed_output, control_output]
479
- else:
480
- # Add None placeholders for missing ControlNet params
481
- generate_inputs.extend([
482
- gr.State(None), # input_image
483
- gr.State("Canny"), # control_mode
484
- gr.State(0.75), # control_context_scale
485
- gr.State(1.0), # image_scale
486
- ])
487
- generate_inputs.extend([
488
- num_inference_steps,
489
- guidance_scale,
490
- seed,
491
- randomize_seed,
492
- ])
493
- generate_outputs = [output_image, seed_output, gr.State(None)]
494
-
495
  generate_btn.click(
496
  fn=generate_image,
497
- inputs=generate_inputs,
498
- outputs=generate_outputs,
499
- )
500
-
501
- prompt.submit(
502
- fn=generate_image,
503
- inputs=generate_inputs,
504
- outputs=generate_outputs,
505
  )
506
 
507
  if __name__ == "__main__":
508
- demo.launch(
509
- share=False,
510
- show_error=True,
511
- css=apple_css,
512
- )
 
3
  import torch
4
  import numpy as np
5
  import random
6
+ import time
7
+ import os
8
  from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
9
  from transformers import AutoTokenizer, Qwen3ForCausalLM
10
  from controlnet_aux.processor import Processor
11
  from PIL import Image
12
+ from safetensors.torch import load_file
13
 
14
+ # Import pipeline and model
15
+ # Ensure videox_fun is in your python path
16
+ from videox_fun.pipeline import ZImageControlPipeline
17
+ from videox_fun.models import ZImageControlTransformer2DModel
18
+
19
+ # Try to import prompt utility, define fallback if missing
20
  try:
21
+ from utils.prompt_utils import polish_prompt
 
 
22
  except ImportError:
23
+ print("utils.prompt_utils not found. Using passthrough for prompt polishing.")
24
+ def polish_prompt(prompt):
25
+ return prompt
26
 
27
+ # Configuration
28
  MAX_SEED = np.iinfo(np.int32).max
29
  MAX_IMAGE_SIZE = 1280
30
 
31
+ # Paths
32
+ MODEL_LOCAL = "models/Z-Image-Turbo/" # Local path or HuggingFace ID
33
+ # We prioritize the local safetensors file for ControlNet weights
34
+ CONTROLNET_WEIGHTS = "models/Z-Image-Turbo-Fun-Controlnet-Union.safetensors"
35
 
36
  print("Loading Z-Image Turbo model...")
 
 
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
  weight_dtype = torch.bfloat16
39
 
40
+ # 1. Load Transformer with Control Config
41
+ print("Initializing Transformer...")
42
+ transformer = ZImageControlTransformer2DModel.from_pretrained(
43
+ MODEL_LOCAL,
44
+ subfolder="transformer",
45
+ transformer_additional_kwargs={
46
+ "control_layers_places": [0, 5, 10, 15, 20, 25],
47
+ "control_in_dim": 16
48
+ },
49
+ ).to(device, weight_dtype)
50
+
51
+ # 2. Load ControlNet Weights manually
52
+ if os.path.exists(CONTROLNET_WEIGHTS):
53
+ print(f"Loading ControlNet weights from {CONTROLNET_WEIGHTS}")
 
54
  try:
55
+ state_dict = load_file(CONTROLNET_WEIGHTS)
56
+ # Handle potential nesting of state_dict
57
+ state_dict = state_dict.get("state_dict", state_dict)
58
+
59
+ m, u = transformer.load_state_dict(state_dict, strict=False)
60
+ print(f"ControlNet Weights Loaded - Missing keys: {len(m)}, Unexpected keys: {len(u)}")
 
 
61
  except Exception as e:
62
+ print(f"Error loading ControlNet weights: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  else:
64
+ print(f"Warning: ControlNet weights not found at {CONTROLNET_WEIGHTS}. Trying to run without them or using base weights.")
65
+
66
+ # 3. Load VAE, Tokenizer, Encoder, Scheduler
67
+ print("Loading core components...")
68
+ vae = AutoencoderKL.from_pretrained(
69
+ MODEL_LOCAL,
70
+ subfolder="vae",
71
+ ).to(device, weight_dtype)
72
+
73
+ tokenizer = AutoTokenizer.from_pretrained(
74
+ MODEL_LOCAL,
75
+ subfolder="tokenizer"
76
+ )
77
+
78
+ text_encoder = Qwen3ForCausalLM.from_pretrained(
79
+ MODEL_LOCAL,
80
+ subfolder="text_encoder",
81
+ torch_dtype=weight_dtype,
82
+ ).to(device)
83
 
84
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
85
+ MODEL_LOCAL,
86
+ subfolder="scheduler"
87
+ )
88
+
89
+ # 4. Assemble Pipeline
90
+ pipe = ZImageControlPipeline(
91
+ vae=vae,
92
+ tokenizer=tokenizer,
93
+ text_encoder=text_encoder,
94
+ transformer=transformer,
95
+ scheduler=scheduler,
96
+ )
97
+ pipe.to(device, weight_dtype)
98
  print(f"Model loaded successfully on {device}!")
99
 
100
+ # --- Helper Functions ---
101
+
102
  def rescale_image(image, scale, divisible_by=16):
103
  """Rescale image and ensure dimensions are divisible by specified value."""
104
+ if image is None:
105
+ return None, 1024, 1024
106
+
107
  width, height = image.size
108
  new_width = int(width * scale)
109
  new_height = int(height * scale)
 
152
  guidance_scale=1.0,
153
  seed=42,
154
  randomize_seed=True,
155
+ is_polish_prompt=True,
156
  progress=gr.Progress(track_tqdm=True)
157
  ):
158
+ timestamp = time.time()
159
 
160
  if not prompt.strip():
161
  raise gr.Error("Please enter a prompt to generate an image.")
162
 
163
+ # 1. Polish Prompt
164
+ final_prompt = prompt
165
+ if is_polish_prompt:
166
+ progress(0.1, desc="Polishing prompt...")
167
+ try:
168
+ final_prompt = polish_prompt(prompt)
169
+ except Exception as e:
170
+ print(f"Prompt polish failed: {e}")
171
+ final_prompt = prompt
172
+
173
+ # 2. Set Seed
174
  if randomize_seed:
175
  seed = random.randint(0, MAX_SEED)
176
  generator = torch.Generator(device).manual_seed(seed)
177
 
178
+ # 3. Process Control Image
179
+ if input_image is None:
180
+ raise gr.Error("Please upload a control image.")
181
+
182
+ progress(0.2, desc=f"Processing {control_mode}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ # Map control mode to processor ID
185
  processor_map = {
186
  'Canny': 'canny',
187
  'HED': 'softedge_hed',
 
189
  'MLSD': 'mlsd',
190
  'Pose': 'openpose_full'
191
  }
 
192
  processor_id = processor_map.get(control_mode, 'canny')
 
193
 
194
+ # Initialize processor
195
+ try:
196
+ processor = Processor(processor_id)
197
+ except Exception as e:
198
+ print(f"Failed to load processor {processor_id}, falling back to Canny. Error: {e}")
199
+ processor = Processor('canny')
200
+
201
+ # Resize input for processing
202
+ control_image_rescaled, width, height = rescale_image(input_image, image_scale, 16)
203
+
204
+ # Run Processor (requires resizing to 1024x1024 typically for best results with these models, then back)
205
+ temp_image = control_image_rescaled.resize((1024, 1024))
206
+ processed_image_pil = processor(temp_image, to_pil=True)
207
+ processed_image_pil = processed_image_pil.resize((width, height))
208
+
209
+ # Convert to Latent
210
+ progress(0.4, desc="Encoding control image...")
211
+ control_image_latent = get_image_latent(
212
+ processed_image_pil,
213
  sample_size=[height, width]
214
  )[:, :, 0]
215
 
216
+ # 4. Generate
217
+ progress(0.5, desc="Generating...")
218
 
219
  try:
220
  result = pipe(
221
+ prompt=final_prompt,
222
+ negative_prompt=negative_prompt,
223
  height=height,
224
  width=width,
225
  generator=generator,
226
  guidance_scale=guidance_scale,
227
+ control_image=control_image_latent,
228
  num_inference_steps=num_inference_steps,
229
  control_context_scale=control_context_scale,
230
  )
231
 
232
  image = result.images[0]
233
  progress(1.0, desc="Complete!")
234
+
235
+ return image, seed, processed_image_pil, final_prompt
236
 
237
  except Exception as e:
238
  raise gr.Error(f"Generation failed: {str(e)}")
239
 
240
+ # --- UI Configuration (Apple Style) ---
241
+
242
  apple_css = """
243
  .gradio-container {
244
  max-width: 1200px !important;
 
246
  padding: 48px 20px !important;
247
  font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', sans-serif !important;
248
  }
249
+ .header-container { text-align: center; margin-bottom: 48px; }
 
 
 
 
 
250
  .main-title {
251
+ font-size: 56px !important; font-weight: 600 !important;
252
+ letter-spacing: -0.02em !important; color: #1d1d1f !important;
 
 
253
  margin: 0 0 12px 0 !important;
254
  }
 
255
  .subtitle {
256
+ font-size: 21px !important; color: #6e6e73 !important;
 
257
  margin: 0 0 24px 0 !important;
258
  }
 
259
  .info-badge {
260
+ display: inline-block; background: #0071e3; color: white;
261
+ padding: 6px 16px; border-radius: 20px; font-size: 14px;
262
+ font-weight: 500; margin-bottom: 16px;
 
 
 
 
 
263
  }
 
264
  textarea {
265
+ font-size: 17px !important; border-radius: 12px !important;
266
+ border: 1px solid #d2d2d7 !important; padding: 12px 16px !important;
 
 
267
  }
 
268
  textarea:focus {
269
+ border-color: #0071e3 !important; box-shadow: 0 0 0 4px rgba(0, 113, 227, 0.15) !important;
 
270
  outline: none !important;
271
  }
 
272
  button.primary {
273
+ font-size: 17px !important; padding: 12px 32px !important;
274
+ border-radius: 980px !important; background: #0071e3 !important;
275
+ border: none !important; color: #ffffff !important;
 
 
 
276
  transition: all 0.2s ease !important;
277
  }
 
278
  button.primary:hover {
279
+ background: #0077ed !important; transform: scale(1.02) !important;
 
280
  }
 
281
  .footer-text {
282
+ text-align: center; margin-top: 48px; font-size: 14px !important;
 
 
283
  color: #86868b !important;
284
  }
 
 
 
 
 
285
  """
286
 
287
+ with gr.Blocks(title="Z-Image Turbo ControlNet", css=apple_css) as demo:
 
288
 
289
+ gr.HTML("""
 
290
  <div class="header-container">
291
+ <div class="info-badge">✓ ControlNet Union</div>
292
  <h1 class="main-title">Z-Image Turbo</h1>
293
+ <p class="subtitle">Multi-Control Generation with LLM Prompt Polishing</p>
294
  </div>
295
  """)
296
 
297
  with gr.Row():
298
+ # Left Input Column
299
  with gr.Column(scale=1):
300
  prompt = gr.Textbox(
301
  label="Prompt",
302
  placeholder="Describe the image you want to create...",
303
+ lines=3
 
304
  )
305
 
306
+ with gr.Row():
307
+ is_polish_prompt = gr.Checkbox(label="Polish Prompt with LLM", value=True)
308
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
309
+
310
  negative_prompt = gr.Textbox(
311
  label="Negative Prompt",
 
312
  value="blurry, ugly, bad quality",
313
+ lines=1
314
  )
315
 
316
+ input_image = gr.Image(
317
+ label="Control Image (Required)",
318
+ type="pil",
319
+ sources=['upload', 'clipboard'],
320
+ height=300
321
+ )
322
+
323
+ control_mode = gr.Radio(
324
+ choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
325
+ value="Canny",
326
+ label="Control Mode",
327
+ info="Select the type of structure to extract"
328
+ )
 
329
 
330
  with gr.Accordion("Advanced Settings", open=False):
331
+ with gr.Row():
332
+ num_inference_steps = gr.Slider(label="Steps", minimum=1, maximum=30, step=1, value=9)
333
+ guidance_scale = gr.Slider(label="Guidance", minimum=0.0, maximum=10.0, step=0.1, value=1.0)
 
 
 
 
 
334
 
335
+ with gr.Row():
336
+ control_context_scale = gr.Slider(label="Control Strength", minimum=0.0, maximum=1.0, step=0.01, value=0.75)
337
+ image_scale = gr.Slider(label="Image Scale", minimum=0.5, maximum=2.0, step=0.1, value=1.0)
 
 
 
 
 
338
 
339
+ seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
340
+
341
+ generate_btn = gr.Button("Generate Image", variant="primary", elem_classes="primary")
342
+
343
+ # Right Output Column
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  with gr.Column(scale=1):
345
+ output_image = gr.Image(label="Generated Image", type="pil")
 
 
 
 
 
 
 
 
 
346
 
347
+ with gr.Accordion("Details & Debug", open=True):
348
+ polished_prompt_output = gr.Textbox(label="Actual Polished Prompt", interactive=False, lines=2)
349
+ with gr.Row():
350
+ seed_output = gr.Number(label="Seed Used", precision=0)
351
+ control_output = gr.Image(label="Preprocessor Output", type="pil")
352
+
 
353
  # Footer
354
  gr.HTML("""
355
  <div class="footer-text">
356
+ Powered by Z-Image Turbo VideoX-Fun • Tongyi-MAI
 
 
 
 
 
 
 
 
 
 
 
357
  </div>
358
  """)
359
+
360
+ # Event Wiring
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  generate_btn.click(
362
  fn=generate_image,
363
+ inputs=[
364
+ prompt, negative_prompt, input_image, control_mode,
365
+ control_context_scale, image_scale, num_inference_steps,
366
+ guidance_scale, seed, randomize_seed, is_polish_prompt
367
+ ],
368
+ outputs=[output_image, seed_output, control_output, polished_prompt_output]
 
 
369
  )
370
 
371
  if __name__ == "__main__":
372
+ demo.launch(share=False)