Spaces:

jgitsolutions
/

AdvancedUpscalerCPU

Sleeping

App Files Files Community

jgitsolutions commited on 24 days ago

Commit

16c0cde

verified ·

1 Parent(s): 0f8236e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +51 -342
requirements.txt +7 -8

app.py CHANGED Viewed

@@ -1,342 +1,51 @@
-# app.py - Complete Color-Corrected Implementation
-import os
-import cv2
-import time
-import numpy as np
-import gradio as gr
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from PIL import Image
-from functools import partial
-# ====================== COLOR PRESERVATION FUNCTIONS ======================
-def preserve_original_colors(original, processed):
-    """Transfer colors from original to processed image"""
-    # Convert to LAB color space
-    original_lab = cv2.cvtColor(original, cv2.COLOR_RGB2LAB)
-    processed_lab = cv2.cvtColor(processed, cv2.COLOR_RGB2LAB)
-    # Replace color information
-    processed_l, _, _ = cv2.split(processed_lab)
-    orig_l, orig_a, orig_b = cv2.split(original_lab)
-    # Use luminance from processed image but color from original
-    # Resize original color channels to match processed dimensions
-    h, w = processed_l.shape[:2]
-    resized_a = cv2.resize(orig_a, (w, h), interpolation=cv2.INTER_LINEAR)
-    resized_b = cv2.resize(orig_b, (w, h), interpolation=cv2.INTER_LINEAR)
-    # Create color corrected image
-    color_corrected = cv2.merge([processed_l, resized_a, resized_b])
-    return cv2.cvtColor(color_corrected, cv2.COLOR_LAB2RGB)
-def fix_color_cast(img):
-    """Remove color cast from image"""
-    # For grayscale/black&white images, force true grayscale
-    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-    gray_rgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-    # Detect if image is likely grayscale
-    diff_r = np.abs(img[:,:,0].astype(np.float32) - gray.astype(np.float32))
-    diff_g = np.abs(img[:,:,1].astype(np.float32) - gray.astype(np.float32))
-    diff_b = np.abs(img[:,:,2].astype(np.float32) - gray.astype(np.float32))
-    total_diff = (np.mean(diff_r) + np.mean(diff_g) + np.mean(diff_b))/3
-    # If grayscale-like, force true grayscale
-    if total_diff < 10:  # Threshold for considering an image grayscale
-        return gray_rgb
-    # Otherwise just correct color balance
-    b, g, r = cv2.split(img)
-    r_avg, g_avg, b_avg = np.mean(r), np.mean(g), np.mean(b)
-    # Compute grayscale average
-    gray_avg = np.mean(gray)
-    # Adjust channels to balance
-    r = np.clip(r * (gray_avg / r_avg) if r_avg > 0 else r, 0, 255).astype(np.uint8)
-    g = np.clip(g * (gray_avg / g_avg) if g_avg > 0 else g, 0, 255).astype(np.uint8)
-    b = np.clip(b * (gray_avg / b_avg) if b_avg > 0 else b, 0, 255).astype(np.uint8)
-    return cv2.merge([b, g, r])
-def simple_edge_enhance(img):
-    """Enhance edges without color distortion"""
-    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-    edges = cv2.Canny(gray, 50, 150)
-    dilated = cv2.dilate(edges, np.ones((2,2), np.uint8))
-    # Create edge mask
-    edge_mask = dilated.astype(np.float32) / 255.0
-    # Sharpen image while preserving colors
-    blurred = cv2.GaussianBlur(img, (0, 0), 3)
-    sharpened = cv2.addWeighted(img, 1.5, blurred, -0.5, 0)
-    # Apply sharpening only to edges
-    edge_mask = cv2.cvtColor(edge_mask[:,:,np.newaxis], cv2.COLOR_GRAY2RGB)
-    enhanced = img * (1 - edge_mask) + sharpened * edge_mask
-    return enhanced.astype(np.uint8)
-# ====================== MODEL ARCHITECTURE ======================
-class SelfAttention(nn.Module):
-    def __init__(self, channels):
-        super().__init__()
-        self.query = nn.Conv2d(channels, channels, 1)
-        self.key = nn.Conv2d(channels, channels, 1)
-        self.value = nn.Conv2d(channels, channels, 1)
-        self.gamma = nn.Parameter(torch.zeros(1))
-    def forward(self, x):
-        batch, c, h, w = x.size()
-        q = self.query(x).view(batch, c, -1)
-        k = self.key(x).view(batch, c, -1).permute(0, 2, 1)
-        v = self.value(x).view(batch, c, -1)
-        attention = F.softmax(torch.bmm(q, k) / (c**0.5), dim=2)
-        out = torch.bmm(attention, v).view(batch, c, h, w)
-        return self.gamma * out + x
-class ResidualBlock(nn.Module):
-    def __init__(self, channels):
-        super().__init__()
-        self.conv1 = nn.Conv2d(channels, channels, 3, padding=1)
-        self.conv2 = nn.Conv2d(channels, channels, 3, padding=1)
-        self.relu = nn.ReLU(inplace=True)
-    def forward(self, x):
-        residual = x
-        x = self.relu(self.conv1(x))
-        x = self.conv2(x)
-        return self.relu(x + residual)
-class UltraEfficientSR(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.initial = nn.Conv2d(3, 64, 3, padding=1)
-        self.blocks = nn.Sequential(
-            ResidualBlock(64),
-            SelfAttention(64),
-            ResidualBlock(64)
-        )
-        self.upconv1 = nn.Conv2d(64, 256, 3, padding=1)
-        self.upconv2 = nn.Conv2d(64, 256, 3, padding=1)
-        self.pixel_shuffle = nn.PixelShuffle(2)
-        self.final = nn.Conv2d(64, 3, 3, padding=1)
-        # Identity color preserving layer
-        self.color_conv = nn.Conv2d(3, 3, 1)
-        self._initialize_weights()
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out')
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
-        # Initialize color conv with identity matrix for color preservation
-        with torch.no_grad():
-            identity = torch.eye(3).reshape(3, 3, 1, 1)
-            self.color_conv.weight.copy_(identity)
-            if self.color_conv.bias is not None:
-                self.color_conv.bias.zero_()
-    def forward(self, x, scale_factor=2):
-        x = self.initial(x)
-        x = self.blocks(x)
-        if scale_factor == 2:
-            x = self.upconv1(x)
-            x = self.pixel_shuffle(x)
-        elif scale_factor == 3:
-            x = self.upconv1(x)
-            x = self.pixel_shuffle(x)
-            x = F.interpolate(x, scale_factor=1.5, mode='bicubic', align_corners=False)
-        elif scale_factor == 4:
-            x = self.upconv1(x)
-            x = self.pixel_shuffle(x)
-            x = self.upconv2(x)
-            x = self.pixel_shuffle(x)
-        x = self.final(x)
-        return self.color_conv(x)
-# ====================== PROCESSING PIPELINE ======================
-def process_tile(model, tile, scale_factor):
-    # Preserve original for color reference
-    original_tile = tile.copy()
-    # Process with model
-    tile_tensor = torch.tensor(tile/255.0, dtype=torch.float32).permute(2,0,1).unsqueeze(0)
-    with torch.no_grad():
-        output = model(tile_tensor, scale_factor)
-    # Get raw output
-    raw_output = output.squeeze().permute(1,2,0).clamp(0,1).numpy() * 255
-    # Color correction
-    color_corrected = preserve_original_colors(original_tile, raw_output.astype(np.uint8))
-    return color_corrected
-def create_pyramid_weights(h, w):
-    y = np.linspace(0, 1, h)
-    x = np.linspace(0, 1, w)
-    xx, yy = np.meshgrid(x, y)
-    weights = np.minimum(np.minimum(xx, 1-xx), np.minimum(yy, 1-yy))
-    return np.minimum(1.0, weights * 4)[:,:,np.newaxis]
-def process_image_with_tiling(model, image, scale_factor, tile_size=256, overlap=32):
-    h, w, c = image.shape
-    out_h, out_w = h*scale_factor, w*scale_factor
-    output = np.zeros((out_h, out_w, c), np.float32)
-    weight_map = np.zeros_like(output)
-    effective_step = tile_size - 2*overlap
-    for y in range(0, h, effective_step):
-        for x in range(0, w, effective_step):
-            y1, x1 = max(0, y-overlap), max(0, x-overlap)
-            y2, x2 = min(h, y+tile_size+overlap), min(w, x+tile_size+overlap)
-            tile = image[y1:y2, x1:x2]
-            processed = process_tile(model, tile, scale_factor)
-            out_y1, out_x1 = y1*scale_factor, x1*scale_factor
-            out_y2, out_x2 = y2*scale_factor, x2*scale_factor
-            # Create weights for this tile
-            weights = create_pyramid_weights(processed.shape[0], processed.shape[1])
-            output[out_y1:out_y2, out_x1:out_x2] += processed * weights
-            weight_map[out_y1:out_y2, out_x1:out_x2] += weights
-    valid_mask = weight_map > 0
-    output[valid_mask] /= weight_map[valid_mask]
-    return output.astype(np.uint8)
-# ====================== CORE SYSTEM COMPONENTS ======================
-class EnergyController:
-    def __init__(self):
-        self.available_threads = os.cpu_count()
-    def adjust_processing(self, image_size):
-        threads = max(1, min(self.available_threads, image_size//(1024**2)+1))
-        torch.set_num_threads(threads)
-        return threads
-class CPUUpscaler:
-    def __init__(self):
-        self.model = torch.quantization.quantize_dynamic(
-            UltraEfficientSR(), {nn.Conv2d}, dtype=torch.qint8
-        ).eval()
-        self.energy_ctrl = EnergyController()
-    def _calculate_optimal_tile_size(self, image):
-        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-        edge_density = cv2.Laplacian(gray, cv2.CV_64F).var()
-        return 128 if edge_density > 500 else 256 if edge_density > 200 else 384
-    def upscale(self, image, scale_factor=2):
-        start_time = time.time()
-        # Input handling
-        if isinstance(image, Image.Image):
-            image_np = np.array(image)
-        else:
-            image_np = image.copy()
-        if image_np.shape[2] == 4:
-            image_np = image_np[:,:,:3]
-        # Force grayscale for B&W images
-        is_grayscale = self._is_grayscale_image(image_np)
-        if is_grayscale:
-            gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
-            image_np = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-        # Processing setup
-        threads_used = self.energy_ctrl.adjust_processing(image_np.size)
-        tile_size = self._calculate_optimal_tile_size(image_np)
-        # Save original for color reference
-        original_img = image_np.copy()
-        # Core processing
-        if max(image_np.shape[:2]) > tile_size:
-            output = process_image_with_tiling(self.model, image_np, scale_factor, tile_size)
-        else:
-            output = process_tile(self.model, image_np, scale_factor)
-        # Final color correction
-        output = preserve_original_colors(
-            cv2.resize(original_img, (output.shape[1], output.shape[0])),
-            output
-        )
-        # For B&W images, ensure true grayscale output
-        if is_grayscale:
-            gray = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)
-            output = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-        # Final edge enhancement
-        output = simple_edge_enhance(output)
-        # Metrics
-        metrics = {
-            "processing_time": f"{time.time()-start_time:.2f}s",
-            "input_resolution": f"{image_np.shape[1]}x{image_np.shape[0]}",
-            "output_resolution": f"{output.shape[1]}x{output.shape[0]}",
-            "threads_used": threads_used,
-            "tile_size": tile_size,
-            "color_preservation": "Active"
-        }
-        return Image.fromarray(output), metrics
-    def _is_grayscale_image(self, img, threshold=5):
-        """Detect if an image is effectively grayscale"""
-        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-        b, g, r = cv2.split(img)
-        diff_r = np.abs(r.astype(np.float32) - gray.astype(np.float32))
-        diff_g = np.abs(g.astype(np.float32) - gray.astype(np.float32))
-        diff_b = np.abs(b.astype(np.float32) - gray.astype(np.float32))
-        total_diff = (np.mean(diff_r) + np.mean(diff_g) + np.mean(diff_b))/3
-        return total_diff < threshold
-# ====================== GRADIO INTERFACE ======================
-def create_interface():
-    upscaler = CPUUpscaler()
-    def process_image(input_img, scale_factor):
-        scale_map = {"2x":2, "3x":3, "4x":4}
-        output_img, metrics = upscaler.upscale(input_img, scale_map[scale_factor])
-        return output_img, [input_img, output_img], metrics
-    with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# Professional Image Upscaler")
-        with gr.Row():
-            with gr.Column(scale=1):
-                input_img = gr.Image(label="Input", type="pil")
-                scale_factor = gr.Radio(["2x","3x","4x"], value="2x", label="Scale")
-                upscale_btn = gr.Button("Upscale", variant="primary")
-            with gr.Column(scale=2):
-                output_img = gr.Image(label="Result", type="pil")
-                comparison = gr.Gallery(columns=2, height="auto")
-                metrics = gr.JSON(label="Metrics")
-        upscale_btn.click(
-            process_image,
-            [input_img, scale_factor],
-            [output_img, comparison, metrics]
-        )
-    return demo
-if __name__ == "__main__":
-    create_interface().launch()

+import gradio as gr
+import torch
+from diffusers import StableDiffusionUpscalePipeline
+# Load pipeline efficiently for CPU
+model_id = "stabilityai/stable-diffusion-x4-upscaler"
+pipe = StableDiffusionUpscalePipeline.from_pretrained(
+    model_id,
+    torch_dtype=torch.float32
+)
+# 1. SLICING: Cuts attention computation into chunks to save RAM
+pipe.enable_attention_slicing("max")
+# 2. OFFLOADING: Moves unused model parts to RAM (critical for low VRAM/CPU)
+# pipe.enable_sequential_cpu_offload() # Only works with GPU to save VRAM. On CPU-only machines, this is not needed/supported.
+def upscale_diffusion_cpu(input_img, prompt="high quality, detailed"):
+    # Resize for the specific pipeline requirements if needed,
+    # but x4 upscaler handles low-res inputs naturally.
+    # CPU Inference is slow, so we limit steps
+    generator = torch.manual_seed(42)
+    output = pipe(
+        prompt=prompt,
+        image=input_img,
+        num_inference_steps=20, # Lower steps for CPU speed (usually 50+)
+        guidance_scale=7.0,
+        generator=generator
+    ).images[0]
+    return output
+desc = """
+### Memory Efficient Diffusion Upscaling (CPU)
+This demo uses **Attention Slicing** and **Sequential Offloading** to run a heavy Latent Diffusion model on CPU.
+*Note: Diffusion on CPU is significantly slower than CNNs (EDSR) but generates hallucinations for missing details.*
+"""
+iface = gr.Interface(
+    fn=upscale_diffusion_cpu,
+    inputs=[
+        gr.Image(type="pil", label="Low Res Input"),
+        gr.Textbox(label="Prompt (Optional)", value="highly detailed, 4k, sharp")
+    ],
+    outputs=gr.Image(type="pil", label="Diffusion Upscaled"),
+    title="Memory Efficient Diffusion Upscaler",
+    description=desc
+)
+iface.launch()

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
-torch>=2.2.0
-torchvision>=0.17.0
-pillow>=10.1.0
-numpy>=1.26.0
-gradio>=4.12.0
-opencv-python-headless>=4.9.0
-huggingface_hub>=0.22.0
-scikit-image>=0.22.0

+torch
+diffusers
+transformers
+accelerate
+scipy
+pillow
+gradio