Spaces:

aptol
/

genshin

Running on Zero

App Files Files Community

aptol commited on Aug 18

Commit

bb6302e

verified ·

1 Parent(s): c082a2e

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -32

app.py CHANGED Viewed

@@ -267,13 +267,35 @@ def _pick_glb(res: Any) -> Optional[str]:
 # STEP1: BG remove / WEAPON remove / T-POSE / Redraw
 # ---------------------------------
 def _remove_bg(img: Image.Image) -> Image.Image:
-    if PKG["rembg_remove"] is None: return img
     try:
-        arr = img.convert("RGBA").tobytes()
-        out = PKG["rembg_remove"](arr)  # bytes->bytes
-        return Image.open(io.BytesIO(out)).convert("RGBA")
     except Exception:
-        return img
 # ---- Weapon remove (DINO -> SAM -> LaMa/OpenCV)
 _DINO_MODEL = None
@@ -520,9 +542,55 @@ def step1_cpu(img, keep_rembg, do_weaponless, weapon_terms):
             base = _weaponless_pipeline(base, weapon_terms, logs)
     except Exception as e:
         logs.append(f"무기 제거 실패: {e}")
     out_path = _save_png(base, OUT / "step1" / "input_preprocessed.png")
-    return [(out_path, "preprocessed")], out_path, "\n".join(logs)
 @spaces.GPU(duration=600)  # ← ZeroGPU 환경: 여기서만 CUDA/모델 로딩 허용
@@ -538,6 +606,18 @@ def step1_gpu_refine(
     redraw_strength = max(0.25, min(0.5,  float(redraw_strength)))
     redraw_steps    = int(max(12,  min(28,  int(redraw_steps))))
     redraw_guidance = max(5.0,  min(9.0,  float(redraw_guidance)))
     """GPU 단계: ControlNet(OpenPose)로 T-포즈 강제 + img2img 리드로우"""
     logs = []
@@ -545,58 +625,59 @@ def step1_gpu_refine(
         raise gr.Error("STEP1 이미지가 없습니다. 먼저 STEP1(CPU)을 실행하세요.")
     img = Image.open(s1_path).convert("RGBA")
     # ---- T-포즈 (ControlNet/OpenPose)
     if enforce_tpose:
         try:
             from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
             import torch
             dev = "cuda" if torch.cuda.is_available() else "cpu"
-            cn = ControlNetModel.from_pretrained(
                 "lllyasviel/control_v11p_sd15_openpose",
-                torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
             )
             pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
                 "runwayml/stable-diffusion-v1-5",
-                controlnet=cn,
-                torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
-            )
-            if dev == "cuda":
-                pipe.to("cuda")
-            pose_canvas = _draw_tpose_openpose_canvas(size=max(img.size))
             img = pipe(
                 prompt="T-pose, full body, clean anime lines",
-                image=img.convert("RGB"),
                 control_image=pose_canvas,
                 strength=float(tpose_strength),
                 guidance_scale=float(tpose_guidance),
-                num_inference_steps=int(tpose_steps)
             ).images[0].convert("RGBA")
-            logs.append("ControlNet(OpenPose) T-포즈 적용")
         except Exception as e:
-            logs.append(f"T-포즈 ControlNet 실패: {e}")
     # ---- img2img 리드로우 (옵션)
     if do_redraw_flag:
         try:
-            from diffusers import StableDiffusionImg2ImgPipeline
-            import torch
-            dev = "cuda" if torch.cuda.is_available() else "cpu"
-            pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
-                "runwayml/stable-diffusion-v1-5",
-                torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
-            )
-            if dev == "cuda":
-                pipe.to("cuda")
             img = pipe(
                 prompt="clean anime illustration, sharp lines, simple solid background",
-                image=img.convert("RGB"),
                 strength=float(redraw_strength),
                 guidance_scale=float(redraw_guidance),
-                num_inference_steps=int(redraw_steps)
             ).images[0].convert("RGBA")
-            logs.append("img2img 리드로우 적용")
         except Exception as e:
-            logs.append(f"img2img 리드로우 실패: {e}")
     out_path = _save_png(img, OUT / "step1" / "input_preprocessed.png")
     return [(out_path, "refined")], out_path, "\n".join(logs)

 # STEP1: BG remove / WEAPON remove / T-POSE / Redraw
 # ---------------------------------
 def _remove_bg(img: Image.Image) -> Image.Image:
+    """
+    rembg를 안전하게 호출 (PNG 바이트 왕복).
+    결과가 완전 투명(알파 0%)이면 실패로 간주하고 원본을 반환.
+    """
     try:
+        from rembg import remove
+        buf = io.BytesIO()
+        img.convert("RGBA").save(buf, format="PNG")
+        out_bytes = remove(buf.getvalue())  # bytes in → bytes out
+        out = Image.open(io.BytesIO(out_bytes)).convert("RGBA")
+        # 완전 투명 체크: 알파 채널 bbox가 없으면 전부 0
+        alpha = out.getchannel("A")
+        if alpha.getbbox() is None:
+            # 전부 투명 -> 실패 처리
+            return img.convert("RGBA")
+        return out
     except Exception:
+        return img.convert("RGBA")
+def _to_preview(img: Image.Image, bg=(40, 40, 40)) -> Image.Image:
+    """
+    갤러리 미리보기용으로 RGBA 이미지를 불투명 배경에 합성.
+    저장물은 RGBA 그대로 두고, UI 표시만 보기 좋게.
+    """
+    if img.mode != "RGBA":
+        return img.convert("RGB")
+    bg_img = Image.new("RGB", img.size, bg)
+    bg_img.paste(img, mask=img.split()[-1])
+    return bg_img
 # ---- Weapon remove (DINO -> SAM -> LaMa/OpenCV)
 _DINO_MODEL = None
             base = _weaponless_pipeline(base, weapon_terms, logs)
     except Exception as e:
         logs.append(f"무기 제거 실패: {e}")
+    # 전처리 끝난 후
     out_path = _save_png(base, OUT / "step1" / "input_preprocessed.png")
+    # ✅ 갤러리는 합성된 미리보기로
+    preview = _to_preview(base)
+    # 기존 return 교체
+    return [preview], out_path, "\n".join(logs)
+def _resize_to_multiple(img: Image.Image, multiple: int = 8, max_side: int = 768) -> Image.Image:
+    """Aspect 유지 + 8의 배수 크기로 리사이즈 (최대 변은 max_side로 제한)"""
+    w, h = img.size
+    # 1) 최대 변 제한
+    scale = min(1.0, float(max_side) / float(max(w, h)))
+    w = int(w * scale); h = int(h * scale)
+    # 2) 8 배수로 내림
+    w = max(multiple, (w // multiple) * multiple)
+    h = max(multiple, (h // multiple) * multiple)
+    if (w, h) != img.size:
+        img = img.resize((w, h), Image.BICUBIC)
+    return img
+def _make_tpose_canvas_like(img: Image.Image) -> Image.Image:
+    """입력 이미지와 같은 해상도의 T-포즈 캔버스 생성"""
+    w, h = img.size
+    size = min(w, h)
+    base = Image.new("RGB", (w, h), "black")
+    # T-포즈 가이드는 정사각 영역에 그린 후 중앙 정렬
+    square = Image.new("RGB", (size, size), "black")
+    d = ImageDraw.Draw(square)
+    cx, cy = size//2, int(size*0.58)
+    arm = int(size*0.36); leg = int(size*0.36); head = int(size*0.06)
+    # spine
+    d.line([(cx, cy-int(size*0.28)), (cx, cy+int(size*0.04))], fill="white", width=10)
+    # arms
+    yA = cy-int(size*0.22)
+    d.line([(cx-arm, yA), (cx+arm, yA)], fill="white", width=10)
+    # legs
+    d.line([(cx, cy+int(size*0.04)), (cx-int(leg*0.65), cy+leg)], fill="white", width=10)
+    d.line([(cx, cy+int(size*0.04)), (cx+int(leg*0.65), cy+leg)], fill="white", width=10)
+    # head
+    d.ellipse([(cx-head, yA-int(size*0.18)-head), (cx+head, yA-int(size*0.18)+head)], outline="white", width=10)
+    # joints
+    for pt in [(cx,yA), (cx-arm,yA), (cx+arm,yA), (cx,cy), (cx,cy+int(size*0.04))]:
+        d.ellipse([(pt[0]-8,pt[1]-8),(pt[0]+8,pt[1]+8)], fill="white")
+    # 중앙 배치
+    offx = (w - size)//2; offy = (h - size)//2
+    base.paste(square, (offx, offy))
+    return base
 @spaces.GPU(duration=600)  # ← ZeroGPU 환경: 여기서만 CUDA/모델 로딩 허용
     redraw_strength = max(0.25, min(0.5,  float(redraw_strength)))
     redraw_steps    = int(max(12,  min(28,  int(redraw_steps))))
     redraw_guidance = max(5.0,  min(9.0,  float(redraw_guidance)))
+    # 입력 이미지와 포즈 캔버스를 같은 해상도(8의 배수)로 맞추기
+    img_rgb = img.convert("RGB")
+    img_rgb = _resize_to_multiple(img_rgb, multiple=8, max_side=768)
+    pose_canvas = _make_tpose_canvas_like(img_rgb)   # 입력과 동일 해상도
+    # (선택) 시드 고정 원하면:
+    # generator = None
+    # try:
+    #     import torch
+    #     generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(0)
+    # except Exception:
+    #     pass
     """GPU 단계: ControlNet(OpenPose)로 T-포즈 강제 + img2img 리드로우"""
     logs = []
         raise gr.Error("STEP1 이미지가 없습니다. 먼저 STEP1(CPU)을 실행하세요.")
     img = Image.open(s1_path).convert("RGBA")
+    # ---- T-포즈 (ControlNet/OpenPose)
     # ---- T-포즈 (ControlNet/OpenPose)
     if enforce_tpose:
         try:
             from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
             import torch
             dev = "cuda" if torch.cuda.is_available() else "cpu"
+            controlnet = ControlNetModel.from_pretrained(
                 "lllyasviel/control_v11p_sd15_openpose",
+                torch_dtype=torch.float16 if dev == "cuda" else torch.float32
             )
             pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
                 "runwayml/stable-diffusion-v1-5",
+                controlnet=controlnet,
+                torch_dtype=torch.float16 if dev == "cuda" else torch.float32
+            ).to(dev)
+            # ✅ 여기서 리사이즈 & 포즈 캔버스 생성
+            img_rgb = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
+            pose_canvas = _make_tpose_canvas_like(img_rgb)
             img = pipe(
                 prompt="T-pose, full body, clean anime lines",
+                image=img_rgb,
                 control_image=pose_canvas,
                 strength=float(tpose_strength),
                 guidance_scale=float(tpose_guidance),
+                num_inference_steps=int(tpose_steps),
+                # generator=generator,
             ).images[0].convert("RGBA")
         except Exception as e:
+            logs.append(f"T-포즈 실패: {e}")
     # ---- img2img 리드로우 (옵션)
+    # ---- 리드로우
     if do_redraw_flag:
         try:
+            img_for_redraw = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
             img = pipe(
                 prompt="clean anime illustration, sharp lines, simple solid background",
+                image=img_for_redraw,
                 strength=float(redraw_strength),
                 guidance_scale=float(redraw_guidance),
+                num_inference_steps=int(redraw_steps),
+                # generator=generator,
             ).images[0].convert("RGBA")
         except Exception as e:
+            logs.append(f"리드로우 실패: {e}")
     out_path = _save_png(img, OUT / "step1" / "input_preprocessed.png")
     return [(out_path, "refined")], out_path, "\n".join(logs)