Update app.py
Browse files
app.py
CHANGED
|
@@ -267,13 +267,35 @@ def _pick_glb(res: Any) -> Optional[str]:
|
|
| 267 |
# STEP1: BG remove / WEAPON remove / T-POSE / Redraw
|
| 268 |
# ---------------------------------
|
| 269 |
def _remove_bg(img: Image.Image) -> Image.Image:
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
| 271 |
try:
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
except Exception:
|
| 276 |
-
return img
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
# ---- Weapon remove (DINO -> SAM -> LaMa/OpenCV)
|
| 279 |
_DINO_MODEL = None
|
|
@@ -520,9 +542,55 @@ def step1_cpu(img, keep_rembg, do_weaponless, weapon_terms):
|
|
| 520 |
base = _weaponless_pipeline(base, weapon_terms, logs)
|
| 521 |
except Exception as e:
|
| 522 |
logs.append(f"무기 제거 실패: {e}")
|
| 523 |
-
|
| 524 |
out_path = _save_png(base, OUT / "step1" / "input_preprocessed.png")
|
| 525 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
|
| 527 |
|
| 528 |
@spaces.GPU(duration=600) # ← ZeroGPU 환경: 여기서만 CUDA/모델 로딩 허용
|
|
@@ -538,6 +606,18 @@ def step1_gpu_refine(
|
|
| 538 |
redraw_strength = max(0.25, min(0.5, float(redraw_strength)))
|
| 539 |
redraw_steps = int(max(12, min(28, int(redraw_steps))))
|
| 540 |
redraw_guidance = max(5.0, min(9.0, float(redraw_guidance)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
|
| 542 |
"""GPU 단계: ControlNet(OpenPose)로 T-포즈 강제 + img2img 리드로우"""
|
| 543 |
logs = []
|
|
@@ -545,58 +625,59 @@ def step1_gpu_refine(
|
|
| 545 |
raise gr.Error("STEP1 이미지가 없습니다. 먼저 STEP1(CPU)을 실행하세요.")
|
| 546 |
img = Image.open(s1_path).convert("RGBA")
|
| 547 |
|
|
|
|
| 548 |
# ---- T-포즈 (ControlNet/OpenPose)
|
| 549 |
if enforce_tpose:
|
| 550 |
try:
|
| 551 |
from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
|
| 552 |
import torch
|
|
|
|
| 553 |
dev = "cuda" if torch.cuda.is_available() else "cpu"
|
| 554 |
-
|
| 555 |
"lllyasviel/control_v11p_sd15_openpose",
|
| 556 |
-
torch_dtype=
|
| 557 |
)
|
|
|
|
| 558 |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
| 559 |
"runwayml/stable-diffusion-v1-5",
|
| 560 |
-
controlnet=
|
| 561 |
-
torch_dtype=
|
| 562 |
-
)
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
|
|
|
|
|
|
| 566 |
img = pipe(
|
| 567 |
prompt="T-pose, full body, clean anime lines",
|
| 568 |
-
image=
|
| 569 |
control_image=pose_canvas,
|
| 570 |
strength=float(tpose_strength),
|
| 571 |
guidance_scale=float(tpose_guidance),
|
| 572 |
-
num_inference_steps=int(tpose_steps)
|
|
|
|
| 573 |
).images[0].convert("RGBA")
|
| 574 |
-
|
| 575 |
except Exception as e:
|
| 576 |
-
logs.append(f"T-포즈
|
|
|
|
| 577 |
|
| 578 |
# ---- img2img 리드로우 (옵션)
|
|
|
|
| 579 |
if do_redraw_flag:
|
| 580 |
try:
|
| 581 |
-
|
| 582 |
-
import torch
|
| 583 |
-
dev = "cuda" if torch.cuda.is_available() else "cpu"
|
| 584 |
-
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
|
| 585 |
-
"runwayml/stable-diffusion-v1-5",
|
| 586 |
-
torch_dtype=(torch.float16 if dev == "cuda" else torch.float32)
|
| 587 |
-
)
|
| 588 |
-
if dev == "cuda":
|
| 589 |
-
pipe.to("cuda")
|
| 590 |
img = pipe(
|
| 591 |
prompt="clean anime illustration, sharp lines, simple solid background",
|
| 592 |
-
image=
|
| 593 |
strength=float(redraw_strength),
|
| 594 |
guidance_scale=float(redraw_guidance),
|
| 595 |
-
num_inference_steps=int(redraw_steps)
|
|
|
|
| 596 |
).images[0].convert("RGBA")
|
| 597 |
-
logs.append("img2img 리드로우 적용")
|
| 598 |
except Exception as e:
|
| 599 |
-
logs.append(f"
|
|
|
|
| 600 |
|
| 601 |
out_path = _save_png(img, OUT / "step1" / "input_preprocessed.png")
|
| 602 |
return [(out_path, "refined")], out_path, "\n".join(logs)
|
|
|
|
| 267 |
# STEP1: BG remove / WEAPON remove / T-POSE / Redraw
|
| 268 |
# ---------------------------------
|
| 269 |
def _remove_bg(img: Image.Image) -> Image.Image:
|
| 270 |
+
"""
|
| 271 |
+
rembg를 안전하게 호출 (PNG 바이트 왕복).
|
| 272 |
+
결과가 완전 투명(알파 0%)이면 실패로 간주하고 원본을 반환.
|
| 273 |
+
"""
|
| 274 |
try:
|
| 275 |
+
from rembg import remove
|
| 276 |
+
buf = io.BytesIO()
|
| 277 |
+
img.convert("RGBA").save(buf, format="PNG")
|
| 278 |
+
out_bytes = remove(buf.getvalue()) # bytes in → bytes out
|
| 279 |
+
out = Image.open(io.BytesIO(out_bytes)).convert("RGBA")
|
| 280 |
+
|
| 281 |
+
# 완전 투명 체크: 알파 채널 bbox가 없으면 전부 0
|
| 282 |
+
alpha = out.getchannel("A")
|
| 283 |
+
if alpha.getbbox() is None:
|
| 284 |
+
# 전부 투명 -> 실패 처리
|
| 285 |
+
return img.convert("RGBA")
|
| 286 |
+
return out
|
| 287 |
except Exception:
|
| 288 |
+
return img.convert("RGBA")
|
| 289 |
+
def _to_preview(img: Image.Image, bg=(40, 40, 40)) -> Image.Image:
|
| 290 |
+
"""
|
| 291 |
+
갤러리 미리보기용으로 RGBA 이미지를 불투명 배경에 합성.
|
| 292 |
+
저장물은 RGBA 그대로 두고, UI 표시만 보기 좋게.
|
| 293 |
+
"""
|
| 294 |
+
if img.mode != "RGBA":
|
| 295 |
+
return img.convert("RGB")
|
| 296 |
+
bg_img = Image.new("RGB", img.size, bg)
|
| 297 |
+
bg_img.paste(img, mask=img.split()[-1])
|
| 298 |
+
return bg_img
|
| 299 |
|
| 300 |
# ---- Weapon remove (DINO -> SAM -> LaMa/OpenCV)
|
| 301 |
_DINO_MODEL = None
|
|
|
|
| 542 |
base = _weaponless_pipeline(base, weapon_terms, logs)
|
| 543 |
except Exception as e:
|
| 544 |
logs.append(f"무기 제거 실패: {e}")
|
| 545 |
+
# 전처리 끝난 후
|
| 546 |
out_path = _save_png(base, OUT / "step1" / "input_preprocessed.png")
|
| 547 |
+
|
| 548 |
+
# ✅ 갤러리는 합성된 미리보기로
|
| 549 |
+
preview = _to_preview(base)
|
| 550 |
+
|
| 551 |
+
# 기존 return 교체
|
| 552 |
+
return [preview], out_path, "\n".join(logs)
|
| 553 |
+
|
| 554 |
+
def _resize_to_multiple(img: Image.Image, multiple: int = 8, max_side: int = 768) -> Image.Image:
|
| 555 |
+
"""Aspect 유지 + 8의 배수 크기로 리사이즈 (최대 변은 max_side로 제한)"""
|
| 556 |
+
w, h = img.size
|
| 557 |
+
# 1) 최대 변 제한
|
| 558 |
+
scale = min(1.0, float(max_side) / float(max(w, h)))
|
| 559 |
+
w = int(w * scale); h = int(h * scale)
|
| 560 |
+
# 2) 8 배수로 내림
|
| 561 |
+
w = max(multiple, (w // multiple) * multiple)
|
| 562 |
+
h = max(multiple, (h // multiple) * multiple)
|
| 563 |
+
if (w, h) != img.size:
|
| 564 |
+
img = img.resize((w, h), Image.BICUBIC)
|
| 565 |
+
return img
|
| 566 |
+
|
| 567 |
+
def _make_tpose_canvas_like(img: Image.Image) -> Image.Image:
|
| 568 |
+
"""입력 이미지와 같은 해상도의 T-포즈 캔버스 생성"""
|
| 569 |
+
w, h = img.size
|
| 570 |
+
size = min(w, h)
|
| 571 |
+
base = Image.new("RGB", (w, h), "black")
|
| 572 |
+
# T-포즈 가이드는 정사각 영역에 그린 후 중앙 정렬
|
| 573 |
+
square = Image.new("RGB", (size, size), "black")
|
| 574 |
+
d = ImageDraw.Draw(square)
|
| 575 |
+
cx, cy = size//2, int(size*0.58)
|
| 576 |
+
arm = int(size*0.36); leg = int(size*0.36); head = int(size*0.06)
|
| 577 |
+
# spine
|
| 578 |
+
d.line([(cx, cy-int(size*0.28)), (cx, cy+int(size*0.04))], fill="white", width=10)
|
| 579 |
+
# arms
|
| 580 |
+
yA = cy-int(size*0.22)
|
| 581 |
+
d.line([(cx-arm, yA), (cx+arm, yA)], fill="white", width=10)
|
| 582 |
+
# legs
|
| 583 |
+
d.line([(cx, cy+int(size*0.04)), (cx-int(leg*0.65), cy+leg)], fill="white", width=10)
|
| 584 |
+
d.line([(cx, cy+int(size*0.04)), (cx+int(leg*0.65), cy+leg)], fill="white", width=10)
|
| 585 |
+
# head
|
| 586 |
+
d.ellipse([(cx-head, yA-int(size*0.18)-head), (cx+head, yA-int(size*0.18)+head)], outline="white", width=10)
|
| 587 |
+
# joints
|
| 588 |
+
for pt in [(cx,yA), (cx-arm,yA), (cx+arm,yA), (cx,cy), (cx,cy+int(size*0.04))]:
|
| 589 |
+
d.ellipse([(pt[0]-8,pt[1]-8),(pt[0]+8,pt[1]+8)], fill="white")
|
| 590 |
+
# 중앙 배치
|
| 591 |
+
offx = (w - size)//2; offy = (h - size)//2
|
| 592 |
+
base.paste(square, (offx, offy))
|
| 593 |
+
return base
|
| 594 |
|
| 595 |
|
| 596 |
@spaces.GPU(duration=600) # ← ZeroGPU 환경: 여기서만 CUDA/모델 로딩 허용
|
|
|
|
| 606 |
redraw_strength = max(0.25, min(0.5, float(redraw_strength)))
|
| 607 |
redraw_steps = int(max(12, min(28, int(redraw_steps))))
|
| 608 |
redraw_guidance = max(5.0, min(9.0, float(redraw_guidance)))
|
| 609 |
+
# 입력 이미지와 포즈 캔버스를 같은 해상도(8의 배수)로 맞추기
|
| 610 |
+
img_rgb = img.convert("RGB")
|
| 611 |
+
img_rgb = _resize_to_multiple(img_rgb, multiple=8, max_side=768)
|
| 612 |
+
pose_canvas = _make_tpose_canvas_like(img_rgb) # 입력과 동일 해상도
|
| 613 |
+
|
| 614 |
+
# (선택) 시드 고정 원하면:
|
| 615 |
+
# generator = None
|
| 616 |
+
# try:
|
| 617 |
+
# import torch
|
| 618 |
+
# generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu").manual_seed(0)
|
| 619 |
+
# except Exception:
|
| 620 |
+
# pass
|
| 621 |
|
| 622 |
"""GPU 단계: ControlNet(OpenPose)로 T-포즈 강제 + img2img 리드로우"""
|
| 623 |
logs = []
|
|
|
|
| 625 |
raise gr.Error("STEP1 이미지가 없습니다. 먼저 STEP1(CPU)을 실행하세요.")
|
| 626 |
img = Image.open(s1_path).convert("RGBA")
|
| 627 |
|
| 628 |
+
# ---- T-포즈 (ControlNet/OpenPose)
|
| 629 |
# ---- T-포즈 (ControlNet/OpenPose)
|
| 630 |
if enforce_tpose:
|
| 631 |
try:
|
| 632 |
from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
|
| 633 |
import torch
|
| 634 |
+
|
| 635 |
dev = "cuda" if torch.cuda.is_available() else "cpu"
|
| 636 |
+
controlnet = ControlNetModel.from_pretrained(
|
| 637 |
"lllyasviel/control_v11p_sd15_openpose",
|
| 638 |
+
torch_dtype=torch.float16 if dev == "cuda" else torch.float32
|
| 639 |
)
|
| 640 |
+
|
| 641 |
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
| 642 |
"runwayml/stable-diffusion-v1-5",
|
| 643 |
+
controlnet=controlnet,
|
| 644 |
+
torch_dtype=torch.float16 if dev == "cuda" else torch.float32
|
| 645 |
+
).to(dev)
|
| 646 |
+
|
| 647 |
+
# ✅ 여기서 리사이즈 & 포즈 캔버스 생성
|
| 648 |
+
img_rgb = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
|
| 649 |
+
pose_canvas = _make_tpose_canvas_like(img_rgb)
|
| 650 |
+
|
| 651 |
img = pipe(
|
| 652 |
prompt="T-pose, full body, clean anime lines",
|
| 653 |
+
image=img_rgb,
|
| 654 |
control_image=pose_canvas,
|
| 655 |
strength=float(tpose_strength),
|
| 656 |
guidance_scale=float(tpose_guidance),
|
| 657 |
+
num_inference_steps=int(tpose_steps),
|
| 658 |
+
# generator=generator,
|
| 659 |
).images[0].convert("RGBA")
|
| 660 |
+
|
| 661 |
except Exception as e:
|
| 662 |
+
logs.append(f"T-포즈 실패: {e}")
|
| 663 |
+
|
| 664 |
|
| 665 |
# ---- img2img 리드로우 (옵션)
|
| 666 |
+
# ---- 리드로우
|
| 667 |
if do_redraw_flag:
|
| 668 |
try:
|
| 669 |
+
img_for_redraw = _resize_to_multiple(img.convert("RGB"), multiple=8, max_side=768)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
img = pipe(
|
| 671 |
prompt="clean anime illustration, sharp lines, simple solid background",
|
| 672 |
+
image=img_for_redraw,
|
| 673 |
strength=float(redraw_strength),
|
| 674 |
guidance_scale=float(redraw_guidance),
|
| 675 |
+
num_inference_steps=int(redraw_steps),
|
| 676 |
+
# generator=generator,
|
| 677 |
).images[0].convert("RGBA")
|
|
|
|
| 678 |
except Exception as e:
|
| 679 |
+
logs.append(f"리드로우 실패: {e}")
|
| 680 |
+
|
| 681 |
|
| 682 |
out_path = _save_png(img, OUT / "step1" / "input_preprocessed.png")
|
| 683 |
return [(out_path, "refined")], out_path, "\n".join(logs)
|