Qwen-Image-Edit-Angles

Running on Zero

App Files Files Community

Elea Zhong commited on 19 days ago

Commit

9ac4ead

1 Parent(s): c4a6ce2

run train experiments

Browse files

Files changed (12) hide show

configs/compare/5k_steps.yaml +3 -0
configs/optim/accum-4.yaml +5 -0
configs/optim/cosine.yaml +3 -1
configs/regression/base.yaml +2 -0
configs/regression/lo_mse.yaml +3 -0
configs/regression/triplet/mse-triplet-f.yaml +9 -0
configs/regression/triplet/mse-triplet-g.yaml +9 -0
configs/regression/triplet/mse-triplet-h.yaml +9 -0
qwenimage/datamodels.py +16 -1
qwenimage/foundation.py +19 -10
qwenimage/training.py +1 -0
scripts/train_multi.sh +8 -25

configs/compare/5k_steps.yaml CHANGED Viewed

@@ -1,2 +1,5 @@
 num_train_epochs: 1
 max_train_steps: 5000

+name_suffix:
+  max_steps: 5000
 num_train_epochs: 1
 max_train_steps: 5000

configs/optim/accum-4.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+name_suffix:
+  accum: 4
+gradient_accumulation_steps: 4

configs/optim/cosine.yaml CHANGED Viewed

@@ -1,4 +1,6 @@
 lr_scheduler: cosine
-lr_warmup_steps: 250

+name_suffix:
+  lr: cosine
 lr_scheduler: cosine
+lr_warmup_steps: 50

configs/regression/base.yaml CHANGED Viewed

@@ -20,6 +20,8 @@ regression_gen_steps: 50
 editing_data_dir: "/data/CrispEdit"
 editing_total_per: 1
 validation_loss_terms:
   mse: 1.0

 editing_data_dir: "/data/CrispEdit"
 editing_total_per: 1
+gradient_checkpointing: true
+vae_tiling: false
 validation_loss_terms:
   mse: 1.0

configs/regression/lo_mse.yaml CHANGED Viewed

@@ -1,2 +1,5 @@
 train_loss_terms:
   mse: 0.1

+name_suffix:
+  mse: 0.1
 train_loss_terms:
   mse: 0.1

configs/regression/triplet/mse-triplet-f.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-f"
+output_dir: "/data/checkpoints/reg-mse-triplet-f"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: 0.0
+  triplet_min_abs_diff: 0.25

configs/regression/triplet/mse-triplet-g.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-g"
+output_dir: "/data/checkpoints/reg-mse-triplet-g"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: -0.1
+  triplet_min_abs_diff: 0.25

configs/regression/triplet/mse-triplet-h.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-h"
+output_dir: "/data/checkpoints/reg-mse-triplet-h"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: -0.1
+  triplet_min_abs_diff: 0.3

qwenimage/datamodels.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import enum
 from pathlib import Path
-from typing import Literal
 import torch
 from diffusers.image_processor import PipelineImageInput
@@ -79,6 +79,7 @@ class QwenConfig(ExperimentTrainerParameters):
     offload_text_encoder: bool = True
     quantize_text_encoder: bool = False
     quantize_transformer: bool = False
     train_loss_terms:QwenLossTerms = Field(default_factory=QwenLossTerms)
@@ -103,4 +104,18 @@ class QwenConfig(ExperimentTrainerParameters):
     editing_total_per: int = 1
     regression_base_pipe_steps: int = 8

 import enum
 from pathlib import Path
+from typing import Any, Literal
 import torch
 from diffusers.image_processor import PipelineImageInput
     offload_text_encoder: bool = True
     quantize_text_encoder: bool = False
     quantize_transformer: bool = False
+    vae_tiling: bool = False
     train_loss_terms:QwenLossTerms = Field(default_factory=QwenLossTerms)
     editing_total_per: int = 1
     regression_base_pipe_steps: int = 8
+    name_suffix: dict[str,Any]|None = None
+    def add_suffix_to_names(self):
+        if self.name_suffix is None:
+            return
+        suffix_sum = ""
+        for suf_name,suf_val in self.name_suffix.items():
+            suffix_sum += "_" + suf_name
+            suf_val = str(suf_val)
+            suffix_sum += "_" + suf_val
+        self.run_name += suffix_sum
+        self.output_dir = self.output_dir.removesuffix("/") # in case
+        self.output_dir += suffix_sum

qwenimage/foundation.py CHANGED Viewed

@@ -81,7 +81,17 @@ class QwenImageFoundation(WandModel):
         self.text_encoder.requires_grad_(False)
         self.text_encoder_device = None
         self.transformer.eval()
         self.transformer.requires_grad_(False)
         self.timestep_dist_utils = TimestepDistUtils(
             min_seq_len=self.scheduler.config.base_image_seq_len,
@@ -419,7 +429,7 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
             margin = loss_terms.triplet_margin
             triplet_min_abs_diff = loss_terms.triplet_min_abs_diff
             print(f"{triplet_min_abs_diff=}")
-            v_gt_neg_diff = (v_gt_1d - v_neg_1d).abs().mean(dim=2, keepdim=True)
             zero_weight = torch.zeros_like(v_gt_neg_diff)
             v_weight = torch.where(v_gt_neg_diff > triplet_min_abs_diff, v_gt_neg_diff, zero_weight)
             ones = torch.ones_like(v_gt_neg_diff)
@@ -431,12 +441,11 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
             diffv_gt_pred = (v_gt_1d - v_pred_1d).pow(2)
             diffv_neg_pred = (v_neg_1d - v_pred_1d).pow(2)
-            loss_unreduced = diffv_gt_pred - diffv_neg_pred
-            loss_weighted = (loss_unreduced * v_weight).sum(dim=2)
-            triplet_loss = F.relu(loss_weighted + margin).mean()
-            ones = torch.ones_like(loss_weighted)
-            zeros = torch.zeros_like(loss_weighted)
-            loss_nonzero_nums = torch.sum(torch.where((loss_weighted + margin)>0, ones, zeros))
             wand_logger.log({
                 "loss_nonzero_nums": loss_nonzero_nums,
             }, commit=False)
@@ -447,8 +456,7 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
             texam(v_weight, "v_weight")
             texam(diffv_gt_pred, "diffv_gt_pred")
             texam(diffv_neg_pred, "diffv_neg_pred")
-            texam(loss_unreduced, "loss_unreduced")
-            texam(loss_weighted, "loss_weighted")
@@ -467,7 +475,8 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
         if loss_accumulator.has_group("pixel"):
             x_0_pred = x_t_1d - t * v_pred_1d
-            pixel_values_x0_gt = self.latents_to_pil(x_0_1d, h=h_f16, w=w_f16, with_grad=True).detach()
             pixel_values_x0_pred = self.latents_to_pil(x_0_pred, h=h_f16, w=w_f16, with_grad=True)
             if loss_accumulator.has("pixel_lpips"):

         self.text_encoder.requires_grad_(False)
         self.text_encoder_device = None
         self.transformer.eval()
         self.transformer.requires_grad_(False)
+        if self.config.gradient_checkpointing:
+            self.transformer.enable_gradient_checkpointing()
+        if self.config.vae_tiling:
+            self.vae.enable_tiling(
+                576,
+                576,
+                512,
+                512
+            )
         self.timestep_dist_utils = TimestepDistUtils(
             min_seq_len=self.scheduler.config.base_image_seq_len,
             margin = loss_terms.triplet_margin
             triplet_min_abs_diff = loss_terms.triplet_min_abs_diff
             print(f"{triplet_min_abs_diff=}")
+            v_gt_neg_diff = (v_gt_1d - v_neg_1d).abs().mean(dim=2)
             zero_weight = torch.zeros_like(v_gt_neg_diff)
             v_weight = torch.where(v_gt_neg_diff > triplet_min_abs_diff, v_gt_neg_diff, zero_weight)
             ones = torch.ones_like(v_gt_neg_diff)
             diffv_gt_pred = (v_gt_1d - v_pred_1d).pow(2)
             diffv_neg_pred = (v_neg_1d - v_pred_1d).pow(2)
+            per_tok_diff = (diffv_gt_pred - diffv_neg_pred).sum(dim=2)
+            triplet_loss = torch.mean(F.relu((per_tok_diff + margin) * v_weight))
+            ones = torch.ones_like(per_tok_diff)
+            zeros = torch.zeros_like(per_tok_diff)
+            loss_nonzero_nums = torch.sum(torch.where(((per_tok_diff + margin) * v_weight)>0, ones, zeros))
             wand_logger.log({
                 "loss_nonzero_nums": loss_nonzero_nums,
             }, commit=False)
             texam(v_weight, "v_weight")
             texam(diffv_gt_pred, "diffv_gt_pred")
             texam(diffv_neg_pred, "diffv_neg_pred")
+            texam(per_tok_diff, "per_tok_diff")
         if loss_accumulator.has_group("pixel"):
             x_0_pred = x_t_1d - t * v_pred_1d
+            with torch.no_grad():
+                pixel_values_x0_gt = self.latents_to_pil(x_0_1d, h=h_f16, w=w_f16, with_grad=True).detach()
             pixel_values_x0_pred = self.latents_to_pil(x_0_pred, h=h_f16, w=w_f16, with_grad=True)
             if loss_accumulator.has("pixel_lpips"):

qwenimage/training.py CHANGED Viewed

@@ -118,6 +118,7 @@ def run_training(config_path: Path | str, update_config_paths: list[Path] | None
     config = QwenConfig(
         **config,
     )
     # Data
     if config.training_type.is_style:

     config = QwenConfig(
         **config,
     )
+    config.add_suffix_to_names()
     # Data
     if config.training_type.is_style:

scripts/train_multi.sh CHANGED Viewed

@@ -1,38 +1,21 @@
 #!/bin/bash
-# nohup python scripts/train.py configs/base.yaml --where modal \
-#     --update configs/regression/base.yaml \
-#     --update configs/regression/modal.yaml \
-#     --update configs/regression/mse.yaml \
-#     --update configs/compare/5k_steps.yaml \
-#     > logs/mse.log 2>&1 &
-nohup python scripts/train.py configs/base.yaml --where modal \
-    --update configs/regression/base.yaml \
-    --update configs/regression/modal.yaml \
-    --update configs/regression/triplet/mse-triplet-b.yaml \
-    --update configs/compare/5k_steps.yaml \
-    > logs/mse-triplet-b.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
     --update configs/regression/modal.yaml \
-    --update configs/regression/triplet/mse-triplet-c.yaml \
-    --update configs/compare/5k_steps.yaml \
-    > logs/mse-triplet-c.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
     --update configs/regression/modal.yaml \
-    --update configs/regression/triplet/mse-triplet-d.yaml \
-    --update configs/compare/5k_steps.yaml \
-    > logs/mse-triplet-d.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
     --update configs/regression/modal.yaml \
-    --update configs/regression/triplet/mse-triplet-e.yaml \
-    --update configs/compare/5k_steps.yaml \
-    > logs/mse-triplet-e.log 2>&1 &

 #!/bin/bash
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
     --update configs/regression/modal.yaml \
+    --update configs/regression/mse-pixel-lpips.yaml \
+    > logs/mse-pixel-lpips.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
     --update configs/regression/modal.yaml \
+    --update configs/regression/mse-pixel-lpips.yaml \
+    --update configs/optim/accum-4.yaml \
+    > logs/mse-pixel-lpips-accum4.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
     --update configs/regression/modal.yaml \
+    --update configs/regression/mse-pixel-lpips.yaml \
+    --update configs/optim/cosine.yaml \
+    > logs/mse-pixel-lpips-cosine.log 2>&1 &