Spaces:

aptol
/

genshin

Running on Zero

App Files Files Community

aptol commited on Aug 18

Commit

70e511b

verified ·

1 Parent(s): b69bbf2

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -42

app.py CHANGED Viewed

@@ -691,51 +691,153 @@ def step1_gpu_refine(
     # ---- T-포즈 (ControlNet/OpenPose)
     if enforce_tpose:
-        from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
-        controlnet = ControlNetModel.from_pretrained(
-            "lllyasviel/control_v11p_sd15_openpose",
-            torch_dtype=dtype
-        )
-        pipe_pose = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5",
-            controlnet=controlnet,
-            torch_dtype=dtype,
-            safety_checker=None,
-            feature_extractor=None,
-        )
-        pipe_pose = _disable_safety(pipe_pose)
-        try: pipe_pose.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_pose.scheduler.config)
-        except: pass
-        if dev == "cuda": pipe_pose.to("cuda")
-        img_rgb    = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=512)
-        pose_orig  = _openpose_canvas_from_image(img_rgb)
-        pose_tpose = _draw_tpose_openpose_canvas(size=min(img_rgb.size))  # 기존 T-포즈 가이드
-        pose_canvas= _blend_pose_canvases(pose_orig, pose_tpose, alpha=0.4)
-        out = pipe_pose(
-            prompt="T-pose tendency, full body, same outfit and colors, clean anime lines, plain light background",
-            negative_prompt="extra limbs, deformed, melted face, distorted body, watermark, text, noisy",
-            image=img_rgb,
-            control_image=pose_canvas,
-            strength=float(tpose_strength),          # (클램프됨)
-            guidance_scale=float(tpose_guidance),
-            num_inference_steps=int(tpose_steps),
-            controlnet_conditioning_scale=0.30,      # ★ 낮춤
-            control_guidance_start=[0.0],
-            control_guidance_end=[0.50],             # ★ 중반까지만
-            guess_mode=True                          # ★ 배경/미세영역 과제어 방지
-        ).images[0]
-        img = out.convert("RGBA")
-        if _mean_brightness(img) < 12:
-            logs.append("T-포즈 결과가 너무 어두움 → 원본으로 롤백")
-            img = Image.open(s1_path).convert("RGBA")
-        else:
-            (OUT/"step1"/"dbg_pose_orig.png").write_bytes(pose_orig.tobytes()) if False else None
-            img.save(OUT/"step1"/"dbg_03_after_tpose.png")
-            logs.append("ControlNet(OpenPose) T-포즈(블렌드) 적용")
     # ---- (옵션) 리드로우(img2img)

     # ---- T-포즈 (ControlNet/OpenPose)
     if enforce_tpose:
+        try:
+            from diffusers import (
+                ControlNetModel,
+                StableDiffusionControlNetImg2ImgPipeline,
+                DPMSolverMultistepScheduler
+            )
+            import torch, math
+            dev  = "cuda" if torch.cuda.is_available() else "cpu"
+            dtype = torch.float16 if dev == "cuda" else torch.float32
+            # 1) ControlNet 로드
+            controlnet = ControlNetModel.from_pretrained(
+                "lllyasviel/control_v11p_sd15_openpose",
+                torch_dtype=dtype
+            )
+            pipe_pose = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                controlnet=controlnet,
+                torch_dtype=dtype,
+                safety_checker=None,
+                feature_extractor=None,
+            )
+            # 세이프티 완전 비활성 (우리가 만든 유틸)
+            pipe_pose = _disable_safety(pipe_pose)
+            # ★ 더 안정적인 Karras DPM-Solver
+            try:
+                pipe_pose.scheduler = DPMSolverMultistepScheduler.from_config(
+                    pipe_pose.scheduler.config,
+                    use_karras_sigmas=True
+                )
+            except Exception:
+                pass
+            if dev == "cuda": pipe_pose.to("cuda")
+            try:
+                pipe_pose.enable_vae_slicing()
+            except Exception:
+                pass
+            # 2) 입력/포즈 준비 (해상도 통일 + 8배수)
+            base_rgb   = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=512)
+            # 원본 포즈를 추출해서 T포즈와 '약하게' 블렌드(0.2)
+            pose_orig  = _openpose_canvas_from_image(base_rgb)  # 원본 포즈(스켈레톤)
+            pose_t     = _make_tpose_canvas_like(base_rgb)      # 우리가 그린 T포즈 캔버스
+            pose_canvas= _blend_pose_canvases(pose_orig, pose_t, alpha=0.20).resize(base_rgb.size)
+            # 선이 너무 얇으면 수렴이 흔들려서 두껍게 강화
+            try:
+                from PIL import ImageDraw
+                pc = pose_canvas.copy()
+                draw = ImageDraw.Draw(pc)
+                # 테두리 강화(하얀 프레임을 더함)
+                w,h = pc.size
+                draw.rectangle([2,2,w-3,h-3], outline=(255,255,255), width=2)
+                pose_canvas = pc
+            except Exception:
+                pass
+            # 3) 프로ンプ트 (밝고 단순한 배경 + NSFW 방지 단어)
+            POS = (
+                "T-pose tendency, full body, same outfit and colors, clean anime lines, "
+                "consistent scale, white studio background, bright, high-key lighting"
+            )
+            NEG = (
+                "black background, low-key lighting, extra limbs, extra fingers, deformed hands, "
+                "melted face, distorted body, nsfw, cleavage, underwear, bikini, watermark, text, noisy"
+            )
+            # 4) 아주 낮은 ControlNet 영향 + 짧은 적용 구간
+            #    cond_scale 스케줄링: 초반 0.22 -> 후반 0.18 로 살짝 감쇠
+            def cond_scale_by_step(step, total):
+                start, end = 0.22, 0.18
+                t = step / max(1,total-1)
+                return end + (start - end) * (1.0 - t)  # 선형 감소
+            steps = int(max(14, min(28, int(tpose_steps))))
+            strength = float(max(0.45, min(0.65, float(tpose_strength))))
+            guidance = float(max(7.0,  min(9.5,  float(tpose_guidance))))
+            # diffusers는 per-step cond_scale 스케줄이 직접 지원되지 않으므로
+            # 구간을 2번 호출로 나눠서 흉내낸다: [0~0.2] 구간 강하게, [0.2~0.35] 약하게
+            def run_pose(_img_rgb, scale, start, end, n_steps):
+                return pipe_pose(
+                    prompt=POS,
+                    negative_prompt=NEG,
+                    image=_img_rgb,
+                    control_image=pose_canvas,
+                    strength=strength,
+                    guidance_scale=guidance,
+                    num_inference_steps=n_steps,
+                    controlnet_conditioning_scale=scale,
+                    control_guidance_start=[start],
+                    control_guidance_end=[end],
+                    guess_mode=True
+                ).images[0].convert("RGBA")
+            # 1차: 초반 유도 (0.05~0.20, scale 0.22)
+            out_a = run_pose(base_rgb, scale=0.22, start=0.05, end=0.20, n_steps=math.ceil(steps*0.55))
+            # 2차: 중반 마무리 (0.20~0.35, scale 0.18) — 입력은 1차 결과
+            inter_rgb = _resize_to_multiple(out_a.convert("RGB"), multiple=8, max_side=512)
+            out_b = run_pose(inter_rgb, scale=0.18, start=0.20, end=0.35, n_steps=steps - math.ceil(steps*0.55))
+            out = out_b
+            # 5) 얼굴 보호(원본에서 얼굴만 복원): 간단한 밝기/색 기반 박스 추정
+            try:
+                # 얼굴 박스를 MediaPipe 없이 추정(대충 상단 중앙 30~35%)
+                W,H = out.size
+                cx, cy = W//2, int(H*0.30)
+                bw, bh = int(W*0.36), int(H*0.28)
+                x1, y1 = max(0, cx-bw//2), max(0, cy-bh//2)
+                x2, y2 = min(W, cx+bw//2), min(H, cy+bh//2)
+                face_new = out.crop((x1,y1,x2,y2))
+                face_old = Image.open(s1_path).convert("RGBA").resize((W,H), Image.LANCZOS).crop((x1,y1,x2,y2))
+                # 소프트 마스크로 자연스럽게 덮어씌우기
+                import numpy as np
+                m = Image.new("L", (x2-x1, y2-y1), 0)
+                from PIL import ImageFilter
+                # 타원형 마스크
+                mm = Image.new("L", m.size, 0)
+                draw = ImageDraw.Draw(mm)
+                draw.ellipse([4,4,mm.size[0]-5,mm.size[1]-5], fill=255)
+                mm = mm.filter(ImageFilter.GaussianBlur(6))
+                face_mix = Image.composite(face_old, face_new, mm)
+                out.paste(face_mix, (x1,y1), mm)
+            except Exception:
+                pass
+            # 6) 너무 어두우면 밝기 리프트 + 실패 시 원본 롤백
+            if _mean_brightness(out) < 16:
+                out = _lift_brightness(out, gain=1.20, gamma=0.88)
+            if _mean_brightness(out) < 12:
+                logs.append("T-포즈(SafeMode) 결과가 어두워 원본 유지")
+            else:
+                img = out
+                img.save(OUT/"step1"/"dbg_03_after_tpose.png")
+                pose_canvas.save(OUT/"step1"/"dbg_pose_safemode.png")
+                logs.append("T-포즈(SafeMode) 적용: 원본80%+T포즈20%, cond_scale↓, Karras DPM, 얼굴 보호")
+        except Exception as e:
+            logs.append(f"T-포즈 ControlNet 실패(SafeMode): {e}")
     # ---- (옵션) 리드로우(img2img)