jgitsolutions commited on
Commit
16c0cde
·
verified ·
1 Parent(s): 0f8236e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +51 -342
  2. requirements.txt +7 -8
app.py CHANGED
@@ -1,342 +1,51 @@
1
- # app.py - Complete Color-Corrected Implementation
2
- import os
3
- import cv2
4
- import time
5
- import numpy as np
6
- import gradio as gr
7
- import torch
8
- import torch.nn as nn
9
- import torch.nn.functional as F
10
- from PIL import Image
11
- from functools import partial
12
-
13
- # ====================== COLOR PRESERVATION FUNCTIONS ======================
14
- def preserve_original_colors(original, processed):
15
- """Transfer colors from original to processed image"""
16
- # Convert to LAB color space
17
- original_lab = cv2.cvtColor(original, cv2.COLOR_RGB2LAB)
18
- processed_lab = cv2.cvtColor(processed, cv2.COLOR_RGB2LAB)
19
-
20
- # Replace color information
21
- processed_l, _, _ = cv2.split(processed_lab)
22
- orig_l, orig_a, orig_b = cv2.split(original_lab)
23
-
24
- # Use luminance from processed image but color from original
25
- # Resize original color channels to match processed dimensions
26
- h, w = processed_l.shape[:2]
27
- resized_a = cv2.resize(orig_a, (w, h), interpolation=cv2.INTER_LINEAR)
28
- resized_b = cv2.resize(orig_b, (w, h), interpolation=cv2.INTER_LINEAR)
29
-
30
- # Create color corrected image
31
- color_corrected = cv2.merge([processed_l, resized_a, resized_b])
32
- return cv2.cvtColor(color_corrected, cv2.COLOR_LAB2RGB)
33
-
34
- def fix_color_cast(img):
35
- """Remove color cast from image"""
36
- # For grayscale/black&white images, force true grayscale
37
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
38
- gray_rgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
39
-
40
- # Detect if image is likely grayscale
41
- diff_r = np.abs(img[:,:,0].astype(np.float32) - gray.astype(np.float32))
42
- diff_g = np.abs(img[:,:,1].astype(np.float32) - gray.astype(np.float32))
43
- diff_b = np.abs(img[:,:,2].astype(np.float32) - gray.astype(np.float32))
44
-
45
- total_diff = (np.mean(diff_r) + np.mean(diff_g) + np.mean(diff_b))/3
46
-
47
- # If grayscale-like, force true grayscale
48
- if total_diff < 10: # Threshold for considering an image grayscale
49
- return gray_rgb
50
-
51
- # Otherwise just correct color balance
52
- b, g, r = cv2.split(img)
53
- r_avg, g_avg, b_avg = np.mean(r), np.mean(g), np.mean(b)
54
-
55
- # Compute grayscale average
56
- gray_avg = np.mean(gray)
57
-
58
- # Adjust channels to balance
59
- r = np.clip(r * (gray_avg / r_avg) if r_avg > 0 else r, 0, 255).astype(np.uint8)
60
- g = np.clip(g * (gray_avg / g_avg) if g_avg > 0 else g, 0, 255).astype(np.uint8)
61
- b = np.clip(b * (gray_avg / b_avg) if b_avg > 0 else b, 0, 255).astype(np.uint8)
62
-
63
- return cv2.merge([b, g, r])
64
-
65
- def simple_edge_enhance(img):
66
- """Enhance edges without color distortion"""
67
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
68
- edges = cv2.Canny(gray, 50, 150)
69
- dilated = cv2.dilate(edges, np.ones((2,2), np.uint8))
70
-
71
- # Create edge mask
72
- edge_mask = dilated.astype(np.float32) / 255.0
73
-
74
- # Sharpen image while preserving colors
75
- blurred = cv2.GaussianBlur(img, (0, 0), 3)
76
- sharpened = cv2.addWeighted(img, 1.5, blurred, -0.5, 0)
77
-
78
- # Apply sharpening only to edges
79
- edge_mask = cv2.cvtColor(edge_mask[:,:,np.newaxis], cv2.COLOR_GRAY2RGB)
80
- enhanced = img * (1 - edge_mask) + sharpened * edge_mask
81
-
82
- return enhanced.astype(np.uint8)
83
-
84
- # ====================== MODEL ARCHITECTURE ======================
85
- class SelfAttention(nn.Module):
86
- def __init__(self, channels):
87
- super().__init__()
88
- self.query = nn.Conv2d(channels, channels, 1)
89
- self.key = nn.Conv2d(channels, channels, 1)
90
- self.value = nn.Conv2d(channels, channels, 1)
91
- self.gamma = nn.Parameter(torch.zeros(1))
92
-
93
- def forward(self, x):
94
- batch, c, h, w = x.size()
95
- q = self.query(x).view(batch, c, -1)
96
- k = self.key(x).view(batch, c, -1).permute(0, 2, 1)
97
- v = self.value(x).view(batch, c, -1)
98
-
99
- attention = F.softmax(torch.bmm(q, k) / (c**0.5), dim=2)
100
- out = torch.bmm(attention, v).view(batch, c, h, w)
101
- return self.gamma * out + x
102
-
103
- class ResidualBlock(nn.Module):
104
- def __init__(self, channels):
105
- super().__init__()
106
- self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
107
- self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
108
- self.relu = nn.ReLU(inplace=True)
109
-
110
- def forward(self, x):
111
- residual = x
112
- x = self.relu(self.conv1(x))
113
- x = self.conv2(x)
114
- return self.relu(x + residual)
115
-
116
- class UltraEfficientSR(nn.Module):
117
- def __init__(self):
118
- super().__init__()
119
- self.initial = nn.Conv2d(3, 64, 3, padding=1)
120
- self.blocks = nn.Sequential(
121
- ResidualBlock(64),
122
- SelfAttention(64),
123
- ResidualBlock(64)
124
- )
125
- self.upconv1 = nn.Conv2d(64, 256, 3, padding=1)
126
- self.upconv2 = nn.Conv2d(64, 256, 3, padding=1)
127
- self.pixel_shuffle = nn.PixelShuffle(2)
128
- self.final = nn.Conv2d(64, 3, 3, padding=1)
129
-
130
- # Identity color preserving layer
131
- self.color_conv = nn.Conv2d(3, 3, 1)
132
- self._initialize_weights()
133
-
134
- def _initialize_weights(self):
135
- for m in self.modules():
136
- if isinstance(m, nn.Conv2d):
137
- nn.init.kaiming_normal_(m.weight, mode='fan_out')
138
- if m.bias is not None:
139
- nn.init.zeros_(m.bias)
140
-
141
- # Initialize color conv with identity matrix for color preservation
142
- with torch.no_grad():
143
- identity = torch.eye(3).reshape(3, 3, 1, 1)
144
- self.color_conv.weight.copy_(identity)
145
- if self.color_conv.bias is not None:
146
- self.color_conv.bias.zero_()
147
-
148
- def forward(self, x, scale_factor=2):
149
- x = self.initial(x)
150
- x = self.blocks(x)
151
-
152
- if scale_factor == 2:
153
- x = self.upconv1(x)
154
- x = self.pixel_shuffle(x)
155
- elif scale_factor == 3:
156
- x = self.upconv1(x)
157
- x = self.pixel_shuffle(x)
158
- x = F.interpolate(x, scale_factor=1.5, mode='bicubic', align_corners=False)
159
- elif scale_factor == 4:
160
- x = self.upconv1(x)
161
- x = self.pixel_shuffle(x)
162
- x = self.upconv2(x)
163
- x = self.pixel_shuffle(x)
164
-
165
- x = self.final(x)
166
- return self.color_conv(x)
167
-
168
- # ====================== PROCESSING PIPELINE ======================
169
- def process_tile(model, tile, scale_factor):
170
- # Preserve original for color reference
171
- original_tile = tile.copy()
172
-
173
- # Process with model
174
- tile_tensor = torch.tensor(tile/255.0, dtype=torch.float32).permute(2,0,1).unsqueeze(0)
175
- with torch.no_grad():
176
- output = model(tile_tensor, scale_factor)
177
-
178
- # Get raw output
179
- raw_output = output.squeeze().permute(1,2,0).clamp(0,1).numpy() * 255
180
-
181
- # Color correction
182
- color_corrected = preserve_original_colors(original_tile, raw_output.astype(np.uint8))
183
- return color_corrected
184
-
185
- def create_pyramid_weights(h, w):
186
- y = np.linspace(0, 1, h)
187
- x = np.linspace(0, 1, w)
188
- xx, yy = np.meshgrid(x, y)
189
- weights = np.minimum(np.minimum(xx, 1-xx), np.minimum(yy, 1-yy))
190
- return np.minimum(1.0, weights * 4)[:,:,np.newaxis]
191
-
192
- def process_image_with_tiling(model, image, scale_factor, tile_size=256, overlap=32):
193
- h, w, c = image.shape
194
- out_h, out_w = h*scale_factor, w*scale_factor
195
- output = np.zeros((out_h, out_w, c), np.float32)
196
- weight_map = np.zeros_like(output)
197
-
198
- effective_step = tile_size - 2*overlap
199
- for y in range(0, h, effective_step):
200
- for x in range(0, w, effective_step):
201
- y1, x1 = max(0, y-overlap), max(0, x-overlap)
202
- y2, x2 = min(h, y+tile_size+overlap), min(w, x+tile_size+overlap)
203
-
204
- tile = image[y1:y2, x1:x2]
205
- processed = process_tile(model, tile, scale_factor)
206
-
207
- out_y1, out_x1 = y1*scale_factor, x1*scale_factor
208
- out_y2, out_x2 = y2*scale_factor, x2*scale_factor
209
-
210
- # Create weights for this tile
211
- weights = create_pyramid_weights(processed.shape[0], processed.shape[1])
212
-
213
- output[out_y1:out_y2, out_x1:out_x2] += processed * weights
214
- weight_map[out_y1:out_y2, out_x1:out_x2] += weights
215
-
216
- valid_mask = weight_map > 0
217
- output[valid_mask] /= weight_map[valid_mask]
218
- return output.astype(np.uint8)
219
-
220
- # ====================== CORE SYSTEM COMPONENTS ======================
221
- class EnergyController:
222
- def __init__(self):
223
- self.available_threads = os.cpu_count()
224
-
225
- def adjust_processing(self, image_size):
226
- threads = max(1, min(self.available_threads, image_size//(1024**2)+1))
227
- torch.set_num_threads(threads)
228
- return threads
229
-
230
- class CPUUpscaler:
231
- def __init__(self):
232
- self.model = torch.quantization.quantize_dynamic(
233
- UltraEfficientSR(), {nn.Conv2d}, dtype=torch.qint8
234
- ).eval()
235
- self.energy_ctrl = EnergyController()
236
-
237
- def _calculate_optimal_tile_size(self, image):
238
- gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
239
- edge_density = cv2.Laplacian(gray, cv2.CV_64F).var()
240
- return 128 if edge_density > 500 else 256 if edge_density > 200 else 384
241
-
242
- def upscale(self, image, scale_factor=2):
243
- start_time = time.time()
244
-
245
- # Input handling
246
- if isinstance(image, Image.Image):
247
- image_np = np.array(image)
248
- else:
249
- image_np = image.copy()
250
-
251
- if image_np.shape[2] == 4:
252
- image_np = image_np[:,:,:3]
253
-
254
- # Force grayscale for B&W images
255
- is_grayscale = self._is_grayscale_image(image_np)
256
- if is_grayscale:
257
- gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
258
- image_np = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
259
-
260
- # Processing setup
261
- threads_used = self.energy_ctrl.adjust_processing(image_np.size)
262
- tile_size = self._calculate_optimal_tile_size(image_np)
263
-
264
- # Save original for color reference
265
- original_img = image_np.copy()
266
-
267
- # Core processing
268
- if max(image_np.shape[:2]) > tile_size:
269
- output = process_image_with_tiling(self.model, image_np, scale_factor, tile_size)
270
- else:
271
- output = process_tile(self.model, image_np, scale_factor)
272
-
273
- # Final color correction
274
- output = preserve_original_colors(
275
- cv2.resize(original_img, (output.shape[1], output.shape[0])),
276
- output
277
- )
278
-
279
- # For B&W images, ensure true grayscale output
280
- if is_grayscale:
281
- gray = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)
282
- output = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
283
-
284
- # Final edge enhancement
285
- output = simple_edge_enhance(output)
286
-
287
- # Metrics
288
- metrics = {
289
- "processing_time": f"{time.time()-start_time:.2f}s",
290
- "input_resolution": f"{image_np.shape[1]}x{image_np.shape[0]}",
291
- "output_resolution": f"{output.shape[1]}x{output.shape[0]}",
292
- "threads_used": threads_used,
293
- "tile_size": tile_size,
294
- "color_preservation": "Active"
295
- }
296
-
297
- return Image.fromarray(output), metrics
298
-
299
- def _is_grayscale_image(self, img, threshold=5):
300
- """Detect if an image is effectively grayscale"""
301
- gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
302
- b, g, r = cv2.split(img)
303
-
304
- diff_r = np.abs(r.astype(np.float32) - gray.astype(np.float32))
305
- diff_g = np.abs(g.astype(np.float32) - gray.astype(np.float32))
306
- diff_b = np.abs(b.astype(np.float32) - gray.astype(np.float32))
307
-
308
- total_diff = (np.mean(diff_r) + np.mean(diff_g) + np.mean(diff_b))/3
309
- return total_diff < threshold
310
-
311
- # ====================== GRADIO INTERFACE ======================
312
- def create_interface():
313
- upscaler = CPUUpscaler()
314
-
315
- def process_image(input_img, scale_factor):
316
- scale_map = {"2x":2, "3x":3, "4x":4}
317
- output_img, metrics = upscaler.upscale(input_img, scale_map[scale_factor])
318
- return output_img, [input_img, output_img], metrics
319
-
320
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
321
- gr.Markdown("# Professional Image Upscaler")
322
- with gr.Row():
323
- with gr.Column(scale=1):
324
- input_img = gr.Image(label="Input", type="pil")
325
- scale_factor = gr.Radio(["2x","3x","4x"], value="2x", label="Scale")
326
- upscale_btn = gr.Button("Upscale", variant="primary")
327
-
328
- with gr.Column(scale=2):
329
- output_img = gr.Image(label="Result", type="pil")
330
- comparison = gr.Gallery(columns=2, height="auto")
331
- metrics = gr.JSON(label="Metrics")
332
-
333
- upscale_btn.click(
334
- process_image,
335
- [input_img, scale_factor],
336
- [output_img, comparison, metrics]
337
- )
338
-
339
- return demo
340
-
341
- if __name__ == "__main__":
342
- create_interface().launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ from diffusers import StableDiffusionUpscalePipeline
4
+
5
+ # Load pipeline efficiently for CPU
6
+ model_id = "stabilityai/stable-diffusion-x4-upscaler"
7
+ pipe = StableDiffusionUpscalePipeline.from_pretrained(
8
+ model_id,
9
+ torch_dtype=torch.float32
10
+ )
11
+
12
+ # 1. SLICING: Cuts attention computation into chunks to save RAM
13
+ pipe.enable_attention_slicing("max")
14
+
15
+ # 2. OFFLOADING: Moves unused model parts to RAM (critical for low VRAM/CPU)
16
+ # pipe.enable_sequential_cpu_offload() # Only works with GPU to save VRAM. On CPU-only machines, this is not needed/supported.
17
+
18
+ def upscale_diffusion_cpu(input_img, prompt="high quality, detailed"):
19
+ # Resize for the specific pipeline requirements if needed,
20
+ # but x4 upscaler handles low-res inputs naturally.
21
+
22
+ # CPU Inference is slow, so we limit steps
23
+ generator = torch.manual_seed(42)
24
+ output = pipe(
25
+ prompt=prompt,
26
+ image=input_img,
27
+ num_inference_steps=20, # Lower steps for CPU speed (usually 50+)
28
+ guidance_scale=7.0,
29
+ generator=generator
30
+ ).images[0]
31
+
32
+ return output
33
+
34
+ desc = """
35
+ ### Memory Efficient Diffusion Upscaling (CPU)
36
+ This demo uses **Attention Slicing** and **Sequential Offloading** to run a heavy Latent Diffusion model on CPU.
37
+ *Note: Diffusion on CPU is significantly slower than CNNs (EDSR) but generates hallucinations for missing details.*
38
+ """
39
+
40
+ iface = gr.Interface(
41
+ fn=upscale_diffusion_cpu,
42
+ inputs=[
43
+ gr.Image(type="pil", label="Low Res Input"),
44
+ gr.Textbox(label="Prompt (Optional)", value="highly detailed, 4k, sharp")
45
+ ],
46
+ outputs=gr.Image(type="pil", label="Diffusion Upscaled"),
47
+ title="Memory Efficient Diffusion Upscaler",
48
+ description=desc
49
+ )
50
+
51
+ iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,7 @@
1
- torch>=2.2.0
2
- torchvision>=0.17.0
3
- pillow>=10.1.0
4
- numpy>=1.26.0
5
- gradio>=4.12.0
6
- opencv-python-headless>=4.9.0
7
- huggingface_hub>=0.22.0
8
- scikit-image>=0.22.0
 
1
+ torch
2
+ diffusers
3
+ transformers
4
+ accelerate
5
+ scipy
6
+ pillow
7
+ gradio