Spaces:

weathon
/

VSF

Running on Zero

VSF / app.py

weathon

more prompts

df75977 20 days ago

7.14 kB

	try:
	import spaces
	except ImportError:
	class spaces:
	@staticmethod
	def GPU(fn):
	return fn

	import gradio as gr
	import numpy as np
	import imageio

	import torch
	from diffusers import AutoencoderKLWan
	from vsfwan.pipeline import WanPipeline
	from vsfwan.processor import WanAttnProcessor2_0
	from diffusers import WanVACEPipeline
	from diffusers.utils import export_to_video
	import uuid

	import sys
	import os
	model_id = "stabilityai/stable-diffusion-3.5-large-turbo"
	from src.sd3_pipeline import VSFStableDiffusion3Pipeline
	pipe = VSFStableDiffusion3Pipeline.from_pretrained(
	"stabilityai/stable-diffusion-3.5-large-turbo",
	torch_dtype=torch.bfloat16,
	hf_token=os.environ.get("HF_TOKEN", None)
	)

	from nag import NAGStableDiffusion3Pipeline
	nag_pipe = NAGStableDiffusion3Pipeline.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	token="hf_token",
	)
	# pipe = pipe.to("cuda")
	# nag_pipe = nag_pipe.to("cuda")
	import os
	@spaces.GPU
	def generate_video(positive_prompt, negative_prompt, guidance_scale, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step, progress=gr.Progress(track_tqdm=False)):
	global pipe, nag_pipe
	lambda total: progress.tqdm(range(total))

	print(f"Generating image with params: {positive_prompt}, {negative_prompt}, {guidance_scale}, {bias}, {step}")

	output = pipe(
	prompt=positive_prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=step,
	guidance_scale=0.0,
	generator=torch.Generator(device="cuda").manual_seed(seed),
	).images[0]
	os.makedirs("images", exist_ok=True)
	path = f"images/{uuid.uuid4().hex}.png"
	output.save(path)
	output_path = path
	print(f"Image saved to {output_path}")

	output_nag = nag_pipe(
	prompt=positive_prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=nag_step,
	nag_scale=nag_guidance,
	nag_alpha=nag_alpha,
	nag_tau=nag_tau,
	guidance_scale=0.0,
	).images[0]
	nag_path = f"images/{uuid.uuid4().hex}_nag.png"
	output_nag.save(nag_path)
	print(f"NAG Image saved to {nag_path}")

	return output_path, nag_path

	import json
	with open("sample_prompts.json", "r") as f:
	sample_prompts = json.load(f)

	def load_sample():
	sample = np.random.choice(sample_prompts)
	return sample['prompt'], sample['missing_element']

	with open("anti_aesthetics.json", "r") as f:
	anti_aesthetics_prompts = json.load(f)

	def load_anti_aesthetics_sample():
	sample = np.random.choice(anti_aesthetics_prompts)
	return sample['prompt'], sample['missing_element']

	nouns = ["cat", "dog", "car", "bicycle", "tree", "house", "computer", "phone", "book", "chair", "table", "lamp", "flower", "mountain", "river", "ocean", "cloud", "bird", "fish", "butterfly"]
	methods = ["painting", "sketch", "drawing"]
	def load_abstract_prompt():
	noun = np.random.choice(nouns)
	method = np.random.choice(methods)
	prompt = f"An abstract {method} of a {noun}."
	negative = f"{noun}"
	return prompt, negative
	# def rephrase_prompt(pos_prompt, neg_prompt):
	# completion = client.chat.completions.create(
	# extra_headers={
	# "HTTP-Referer": "<YOUR_SITE_URL>", # Optional. Site URL for rankings on openrouter.ai.
	# "X-Title": "<YOUR_SITE_NAME>", # Optional. Site title for rankings on openrouter.ai.
	# },
	# extra_body={},
	# model="qwen/qwen3-vl-235b-a22b-instruct",
	# messages=[
	# {
	# "role": "user",
	# "content": "Repahrase the following prompt to one sentence for positive prompt and a few words for negative prompt.\n\nOriginal Prompt: {}\n\nNegative Element: {}. \n make sure the generated prompt follows the positive-negative prompt pair, do not mention the negative prompt in positive one".format(pos_prompt, neg_prompt)
	# }
	# ]
	# )

	with gr.Blocks(title="Value Sign Flip SD3.5 Demo") as demo:
	gr.Markdown("# Value Sign Flip SD3.5 Demo \n\n This demo is based on SD3.5-L-Turbo model and uses Value Sign Flip technique to generate videos with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md)\n\nPositive prompt should be at least 1 sentence long or the results will be weird.")
	# gr.Markdown("# Value Sign Flip Wan 2.1 Demo \n\n This demo is based on Wan 2.1 T2V model and uses Value Sign Flip technique to generate videos with different guidance scales and biases. More on [GitHub](https://github.com/weathon/VSF/blob/main/wan.md)\n\nPositive prompt should be at least 2 sentence long or the results will be weird.")

	with gr.Row():
	pos = gr.Textbox(label="Positive Prompt", value="A polished bicycle frame leans against a weathered brick wall under soft morning light.")
	neg = gr.Textbox(label="Negative Prompt", value="wheels")

	# rephase = gr.Button("Rephrase Prompt")
	with gr.Column():
	sample = gr.Button("Load A Sample Prompt")
	sample.click(fn=load_sample, inputs=[], outputs=[pos, neg])
	anti_aesthetic_sample = gr.Button("Load An Anti-Aesthetic Sample Prompt")
	anti_aesthetic_sample.click(fn=load_anti_aesthetics_sample, inputs=[], outputs=[pos, neg])
	abstract_sample = gr.Button("Load An Abstract Prompt")
	abstract_sample.click(fn=load_abstract_prompt, inputs=[], outputs=[pos, neg])
	with gr.Row():
	gr.Markdown("## VSF Generation Parameters")
	guidance = gr.Slider(0, 5, step=0.1, label="Guidance Scale", value=3.0)
	bias = gr.Slider(0, 0.5, step=0.01, label="Bias", value=0.1)
	step = gr.Slider(4, 15, step=1, label="Step", value=8)
	seed = gr.Number(label="Seed", value=0, precision=0)
	set_strong_vsf = gr.Button("Set to VSF Strong Settings")
	set_strong_vsf.click(fn=lambda : (3.8, 0.2), inputs=[], outputs=[guidance, bias])
	set_mild_vsf = gr.Button("Set to VSF Quality Settings")
	set_mild_vsf.click(fn=lambda : (3.3, 0.2), inputs=[], outputs=[guidance, bias])

	with gr.Row():
	gr.Markdown("## NAG Generation Parameters")
	nag_guidance = gr.Slider(1, 10, step=0.1, label="Guidance Scale", value=5)
	nag_alpha = gr.Slider(0.1, 1.0, step=0.01, label="Alpha", value=0.25)
	nag_tau = gr.Slider(1, 10, step=0.01, label="Tau", value=3.0)
	nag_step = gr.Slider(4, 15, step=1, label="Step", value=8)
	set_strong = gr.Button("Set to NAG Strong Settings")
	set_strong.click(fn=lambda : (11, 0.5, 5.0), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])
	set_mild = gr.Button("Set to NAG Quality Settings")
	set_mild.click(fn=lambda : (4, 0.125, 2.5), inputs=[], outputs=[nag_guidance, nag_alpha, nag_tau])



	with gr.Row():
	vsf_out = gr.Image(label="VSF Generated Image")
	nag_out = gr.Image(label="NAG Generated Image")

	btn = gr.Button("Generate")
	btn.click(fn=generate_video, inputs=[pos, neg, guidance, bias, step, seed, nag_guidance, nag_alpha, nag_tau, nag_step], outputs=[vsf_out, nag_out])

	demo.launch(share=True)