|
|
try: |
|
|
import spaces |
|
|
except ImportError: |
|
|
class spaces: |
|
|
@staticmethod |
|
|
def GPU(fn): |
|
|
return fn |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import imageio |
|
|
|
|
|
import torch |
|
|
from diffusers import AutoencoderKLWan |
|
|
from vsfwan.pipeline import WanPipeline |
|
|
from vsfwan.processor import WanAttnProcessor2_0 |
|
|
from diffusers import WanVACEPipeline |
|
|
from diffusers.utils import export_to_video |
|
|
import uuid |
|
|
|
|
|
import sys |
|
|
import os |
|
|
model_id = "stabilityai/stable-diffusion-3.5-large-turbo" |
|
|
from src.sd3_pipeline import VSFStableDiffusion3Pipeline |
|
|
pipe = VSFStableDiffusion3Pipeline.from_pretrained( |
|
|
"stabilityai/stable-diffusion-3.5-large-turbo", |
|
|
torch_dtype=torch.bfloat16, |
|
|
hf_token=os.environ.get("HF_TOKEN", None) |
|
|
) |
|
|
|
|
|
from nag import NAGStableDiffusion3Pipeline |
|
|
nag_pipe = NAGStableDiffusion3Pipeline.from_pretrained( |
|
|
model_id, |
|
|
torch_dtype=torch.bfloat16, |
|
|
token="hf_token", |
|
|
) |
|
|
|
|
|
|
|
|
import os |
|
|
@spaces.GPU |
|
|
def generate_video(positive_prompt, negative_prompt, guidance_scale, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step, progress=gr.Progress(track_tqdm=False)): |
|
|
global pipe, nag_pipe |
|
|
lambda total: progress.tqdm(range(total)) |
|
|
|
|
|
print(f"Generating image with params: {positive_prompt}, {negative_prompt}, {guidance_scale}, {bias}, {step}") |
|
|
|
|
|
output = pipe( |
|
|
prompt=positive_prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
num_inference_steps=step, |
|
|
guidance_scale=0.0, |
|
|
generator=torch.Generator(device="cuda").manual_seed(seed), |
|
|
).images[0] |
|
|
os.makedirs("images", exist_ok=True) |
|
|
path = f"images/{uuid.uuid4().hex}.png" |
|
|
output.save(path) |
|
|
output_path = path |
|
|
print(f"Image saved to {output_path}") |
|
|
|
|
|
output_nag = nag_pipe( |
|
|
prompt=positive_prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
num_inference_steps=nag_step, |
|
|
nag_scale=nag_guidance, |
|
|
nag_alpha=nag_alpha, |
|
|
nag_tau=nag_tau, |
|
|
guidance_scale=0.0, |
|
|
).images[0] |
|
|
nag_path = f"images/{uuid.uuid4().hex}_nag.png" |
|
|
output_nag.save(nag_path) |
|
|
print(f"NAG Image saved to {nag_path}") |
|
|
|
|
|
return output_path, nag_path |
|
|
|
|
|
import json |
|
|
with open("sample_prompts.json", "r") as f: |
|
|
sample_prompts = json.load(f) |
|
|
|
|
|
def load_sample(): |
|
|
sample = np.random.choice(sample_prompts) |
|
|
return sample['prompt'], sample['missing_element'] |
|
|
|
|
|
with open("anti_aesthetics.json", "r") as f: |
|
|
anti_aesthetics_prompts = json.load(f) |
|
|
|
|
|
def load_anti_aesthetics_sample(): |
|
|
sample = np.random.choice(anti_aesthetics_prompts) |
|
|
return sample['prompt'], sample['missing_element'] |
|
|
|
|
|
nouns = ["cat", "dog", "car", "bicycle", "tree", "house", "computer", "phone", "book", "chair", "table", "lamp", "flower", "mountain", "river", "ocean", "cloud", "bird", "fish", "butterfly"] |
|
|
methods = ["painting", "sketch", "drawing"] |
|
|
def load_abstract_prompt(): |
|
|
noun = np.random.choice(nouns) |
|
|
method = np.random.choice(methods) |
|
|
prompt = f"An abstract {method} of a {noun}." |
|
|
negative = f"{noun}" |
|
|
return prompt, negative |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Value Sign Flip SD3.5 Demo") as demo: |
|
|
gr.Markdown("# Value Sign Flip SD3.5 Demo \n\n This demo is based on SD3.5-L-Turbo model and uses Value Sign Flip technique to generate videos with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md)\n\nPositive prompt should be at least 1 sentence long or the results will be weird.") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
pos = gr.Textbox(label="Positive Prompt", value="A polished bicycle frame leans against a weathered brick wall under soft morning light.") |
|
|
neg = gr.Textbox(label="Negative Prompt", value="wheels") |
|
|
|
|
|
|
|
|
with gr.Column(): |
|
|
sample = gr.Button("Load A Sample Prompt") |
|
|
sample.click(fn=load_sample, inputs=[], outputs=[pos, neg]) |
|
|
anti_aesthetic_sample = gr.Button("Load An Anti-Aesthetic Sample Prompt") |
|
|
anti_aesthetic_sample.click(fn=load_anti_aesthetics_sample, inputs=[], outputs=[pos, neg]) |
|
|
abstract_sample = gr.Button("Load An Abstract Prompt") |
|
|
abstract_sample.click(fn=load_abstract_prompt, inputs=[], outputs=[pos, neg]) |
|
|
with gr.Row(): |
|
|
gr.Markdown("## VSF Generation Parameters") |
|
|
guidance = gr.Slider(0, 5, step=0.1, label="Guidance Scale", value=3.0) |
|
|
bias = gr.Slider(0, 0.5, step=0.01, label="Bias", value=0.1) |
|
|
step = gr.Slider(4, 15, step=1, label="Step", value=8) |
|
|
seed = gr.Number(label="Seed", value=0, precision=0) |
|
|
set_strong_vsf = gr.Button("Set to VSF Strong Settings") |
|
|
set_strong_vsf.click(fn=lambda : (3.8, 0.2), inputs=[], outputs=[guidance, bias]) |
|
|
set_mild_vsf = gr.Button("Set to VSF Quality Settings") |
|
|
set_mild_vsf.click(fn=lambda : (3.3, 0.2), inputs=[], outputs=[guidance, bias]) |
|
|
|
|
|
with gr.Row(): |
|
|
gr.Markdown("## NAG Generation Parameters") |
|
|
nag_guidance = gr.Slider(1, 10, step=0.1, label="Guidance Scale", value=5) |
|
|
nag_alpha = gr.Slider(0.1, 1.0, step=0.01, label="Alpha", value=0.25) |
|
|
nag_tau = gr.Slider(1, 10, step=0.01, label="Tau", value=3.0) |
|
|
nag_step = gr.Slider(4, 15, step=1, label="Step", value=8) |
|
|
set_strong = gr.Button("Set to NAG Strong Settings") |
|
|
set_strong.click(fn=lambda : (11, 0.5, 5.0), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau]) |
|
|
set_mild = gr.Button("Set to NAG Quality Settings") |
|
|
set_mild.click(fn=lambda : (4, 0.125, 2.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau]) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
vsf_out = gr.Image(label="VSF Generated Image") |
|
|
nag_out = gr.Image(label="NAG Generated Image") |
|
|
|
|
|
btn = gr.Button("Generate") |
|
|
btn.click(fn=generate_video, inputs=[pos, neg, guidance, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step], outputs=[vsf_out, nag_out]) |
|
|
|
|
|
demo.launch(share=True) |