aptol commited on
Commit
70e511b
·
verified ·
1 Parent(s): b69bbf2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -42
app.py CHANGED
@@ -691,51 +691,153 @@ def step1_gpu_refine(
691
 
692
  # ---- T-포즈 (ControlNet/OpenPose)
693
  if enforce_tpose:
694
- from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
695
- controlnet = ControlNetModel.from_pretrained(
696
- "lllyasviel/control_v11p_sd15_openpose",
697
- torch_dtype=dtype
698
- )
699
- pipe_pose = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
700
- "runwayml/stable-diffusion-v1-5",
701
- controlnet=controlnet,
702
- torch_dtype=dtype,
703
- safety_checker=None,
704
- feature_extractor=None,
705
- )
706
- pipe_pose = _disable_safety(pipe_pose)
707
-
708
- try: pipe_pose.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_pose.scheduler.config)
709
- except: pass
710
- if dev == "cuda": pipe_pose.to("cuda")
711
 
712
- img_rgb = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=512)
713
- pose_orig = _openpose_canvas_from_image(img_rgb)
714
- pose_tpose = _draw_tpose_openpose_canvas(size=min(img_rgb.size)) # 기존 T-포즈 가이드
715
- pose_canvas= _blend_pose_canvases(pose_orig, pose_tpose, alpha=0.4)
716
 
717
- out = pipe_pose(
718
- prompt="T-pose tendency, full body, same outfit and colors, clean anime lines, plain light background",
719
- negative_prompt="extra limbs, deformed, melted face, distorted body, watermark, text, noisy",
720
- image=img_rgb,
721
- control_image=pose_canvas,
722
- strength=float(tpose_strength), # (클램프됨)
723
- guidance_scale=float(tpose_guidance),
724
- num_inference_steps=int(tpose_steps),
725
- controlnet_conditioning_scale=0.30, # ★ 낮춤
726
- control_guidance_start=[0.0],
727
- control_guidance_end=[0.50], # ★ 중반까지만
728
- guess_mode=True # ★ 배경/미세영역 과제어 방지
729
- ).images[0]
 
730
 
731
- img = out.convert("RGBA")
732
- if _mean_brightness(img) < 12:
733
- logs.append("T-포즈 결과가 너무 어두움 → 원본으로 롤백")
734
- img = Image.open(s1_path).convert("RGBA")
735
- else:
736
- (OUT/"step1"/"dbg_pose_orig.png").write_bytes(pose_orig.tobytes()) if False else None
737
- img.save(OUT/"step1"/"dbg_03_after_tpose.png")
738
- logs.append("ControlNet(OpenPose) T-포즈(블렌드) 적용")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739
 
740
 
741
  # ---- (옵션) 리드로우(img2img)
 
691
 
692
  # ---- T-포즈 (ControlNet/OpenPose)
693
  if enforce_tpose:
694
+ try:
695
+ from diffusers import (
696
+ ControlNetModel,
697
+ StableDiffusionControlNetImg2ImgPipeline,
698
+ DPMSolverMultistepScheduler
699
+ )
700
+ import torch, math
 
 
 
 
 
 
 
 
 
 
701
 
702
+ dev = "cuda" if torch.cuda.is_available() else "cpu"
703
+ dtype = torch.float16 if dev == "cuda" else torch.float32
 
 
704
 
705
+ # 1) ControlNet 로드
706
+ controlnet = ControlNetModel.from_pretrained(
707
+ "lllyasviel/control_v11p_sd15_openpose",
708
+ torch_dtype=dtype
709
+ )
710
+ pipe_pose = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
711
+ "runwayml/stable-diffusion-v1-5",
712
+ controlnet=controlnet,
713
+ torch_dtype=dtype,
714
+ safety_checker=None,
715
+ feature_extractor=None,
716
+ )
717
+ # 세이프티 완전 비활성 (우리가 만든 유틸)
718
+ pipe_pose = _disable_safety(pipe_pose)
719
 
720
+ # 더 안정적인 Karras DPM-Solver
721
+ try:
722
+ pipe_pose.scheduler = DPMSolverMultistepScheduler.from_config(
723
+ pipe_pose.scheduler.config,
724
+ use_karras_sigmas=True
725
+ )
726
+ except Exception:
727
+ pass
728
+
729
+ if dev == "cuda": pipe_pose.to("cuda")
730
+ try:
731
+ pipe_pose.enable_vae_slicing()
732
+ except Exception:
733
+ pass
734
+
735
+ # 2) 입력/포즈 준비 (해상도 통일 + 8배수)
736
+ base_rgb = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=512)
737
+
738
+ # 원본 포즈를 추출해서 T포즈와 '약하게' 블렌드(0.2)
739
+ pose_orig = _openpose_canvas_from_image(base_rgb) # 원본 포즈(스켈레톤)
740
+ pose_t = _make_tpose_canvas_like(base_rgb) # 우리가 그린 T포즈 캔버스
741
+ pose_canvas= _blend_pose_canvases(pose_orig, pose_t, alpha=0.20).resize(base_rgb.size)
742
+
743
+ # 선이 너무 얇으면 수렴이 흔들려서 두껍게 강화
744
+ try:
745
+ from PIL import ImageDraw
746
+ pc = pose_canvas.copy()
747
+ draw = ImageDraw.Draw(pc)
748
+ # 테두리 강화(하얀 프레임을 더함)
749
+ w,h = pc.size
750
+ draw.rectangle([2,2,w-3,h-3], outline=(255,255,255), width=2)
751
+ pose_canvas = pc
752
+ except Exception:
753
+ pass
754
+
755
+ # 3) 프로ンプ트 (밝고 단순한 배경 + NSFW 방지 단어)
756
+ POS = (
757
+ "T-pose tendency, full body, same outfit and colors, clean anime lines, "
758
+ "consistent scale, white studio background, bright, high-key lighting"
759
+ )
760
+ NEG = (
761
+ "black background, low-key lighting, extra limbs, extra fingers, deformed hands, "
762
+ "melted face, distorted body, nsfw, cleavage, underwear, bikini, watermark, text, noisy"
763
+ )
764
+
765
+ # 4) 아주 낮은 ControlNet 영향 + 짧은 적용 구간
766
+ # cond_scale 스케줄링: 초반 0.22 -> 후반 0.18 로 살짝 감쇠
767
+ def cond_scale_by_step(step, total):
768
+ start, end = 0.22, 0.18
769
+ t = step / max(1,total-1)
770
+ return end + (start - end) * (1.0 - t) # 선형 감소
771
+
772
+ steps = int(max(14, min(28, int(tpose_steps))))
773
+ strength = float(max(0.45, min(0.65, float(tpose_strength))))
774
+ guidance = float(max(7.0, min(9.5, float(tpose_guidance))))
775
+
776
+ # diffusers는 per-step cond_scale 스케줄이 직접 지원되지 않으므로
777
+ # 구간을 2번 호출로 나눠서 흉내낸다: [0~0.2] 구간 강하게, [0.2~0.35] 약하게
778
+ def run_pose(_img_rgb, scale, start, end, n_steps):
779
+ return pipe_pose(
780
+ prompt=POS,
781
+ negative_prompt=NEG,
782
+ image=_img_rgb,
783
+ control_image=pose_canvas,
784
+ strength=strength,
785
+ guidance_scale=guidance,
786
+ num_inference_steps=n_steps,
787
+ controlnet_conditioning_scale=scale,
788
+ control_guidance_start=[start],
789
+ control_guidance_end=[end],
790
+ guess_mode=True
791
+ ).images[0].convert("RGBA")
792
+
793
+ # 1차: 초반 유도 (0.05~0.20, scale 0.22)
794
+ out_a = run_pose(base_rgb, scale=0.22, start=0.05, end=0.20, n_steps=math.ceil(steps*0.55))
795
+ # 2차: 중반 마무리 (0.20~0.35, scale 0.18) — 입력은 1차 결과
796
+ inter_rgb = _resize_to_multiple(out_a.convert("RGB"), multiple=8, max_side=512)
797
+ out_b = run_pose(inter_rgb, scale=0.18, start=0.20, end=0.35, n_steps=steps - math.ceil(steps*0.55))
798
+
799
+ out = out_b
800
+
801
+ # 5) 얼굴 보호(원본에서 얼굴만 복원): 간단한 밝기/색 기반 박스 추정
802
+ try:
803
+ # 얼굴 박스를 MediaPipe 없이 추정(대충 상단 중앙 30~35%)
804
+ W,H = out.size
805
+ cx, cy = W//2, int(H*0.30)
806
+ bw, bh = int(W*0.36), int(H*0.28)
807
+ x1, y1 = max(0, cx-bw//2), max(0, cy-bh//2)
808
+ x2, y2 = min(W, cx+bw//2), min(H, cy+bh//2)
809
+
810
+ face_new = out.crop((x1,y1,x2,y2))
811
+ face_old = Image.open(s1_path).convert("RGBA").resize((W,H), Image.LANCZOS).crop((x1,y1,x2,y2))
812
+
813
+ # 소프트 마스크로 자연스럽게 덮어씌우기
814
+ import numpy as np
815
+ m = Image.new("L", (x2-x1, y2-y1), 0)
816
+ from PIL import ImageFilter
817
+ # 타원형 마스크
818
+ mm = Image.new("L", m.size, 0)
819
+ draw = ImageDraw.Draw(mm)
820
+ draw.ellipse([4,4,mm.size[0]-5,mm.size[1]-5], fill=255)
821
+ mm = mm.filter(ImageFilter.GaussianBlur(6))
822
+ face_mix = Image.composite(face_old, face_new, mm)
823
+ out.paste(face_mix, (x1,y1), mm)
824
+ except Exception:
825
+ pass
826
+
827
+ # 6) 너무 어두우면 밝기 리프트 + 실패 시 원본 롤백
828
+ if _mean_brightness(out) < 16:
829
+ out = _lift_brightness(out, gain=1.20, gamma=0.88)
830
+ if _mean_brightness(out) < 12:
831
+ logs.append("T-포즈(SafeMode) 결과가 어두워 원본 유지")
832
+ else:
833
+ img = out
834
+ img.save(OUT/"step1"/"dbg_03_after_tpose.png")
835
+ pose_canvas.save(OUT/"step1"/"dbg_pose_safemode.png")
836
+ logs.append("T-포즈(SafeMode) 적용: 원본80%+T포즈20%, cond_scale↓, Karras DPM, 얼굴 보호")
837
+
838
+ except Exception as e:
839
+ logs.append(f"T-포즈 ControlNet 실패(SafeMode): {e}")
840
+
841
 
842
 
843
  # ---- (옵션) 리드로우(img2img)