-
Notifications
You must be signed in to change notification settings - Fork 398
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First implementation of the paper "Low-rank Adaptation for Fast Text-to-Image Diffusion Fine-tuning" for riffusion. Still needs to be integrated a lot more. Reference: https://github.com/cloneofsimo/lora Topic: lora_1 Relative: magic_mix
- Loading branch information
Showing
10 changed files
with
1,134 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ torch | |
torchaudio | ||
torchvision | ||
transformers | ||
git+https://github.com/cloneofsimo/lora.git |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
export MODEL_NAME="riffusion/riffusion-model-v1" | ||
export INSTANCE_DIR="/tmp/sample_clips_tdlcqdfi/images" | ||
export OUTPUT_DIR="/home/ubuntu/lora_dreambooth_waterfalls_2k" | ||
|
||
accelerate launch\ | ||
--num_machines 1 \ | ||
--num_processes 8 \ | ||
--dynamo_backend=no \ | ||
--mixed_precision="fp16" \ | ||
riffusion/external/lora/train_lora_dreambooth.py \ | ||
--pretrained_model_name_or_path=$MODEL_NAME \ | ||
--instance_data_dir=$INSTANCE_DIR \ | ||
--output_dir=$OUTPUT_DIR \ | ||
--instance_prompt="style of sks" \ | ||
--resolution=512 \ | ||
--train_batch_size=1 \ | ||
--gradient_accumulation_steps=1 \ | ||
--learning_rate=1e-4 \ | ||
--lr_scheduler="constant" \ | ||
--lr_warmup_steps=0 \ | ||
--max_train_steps=2000 | ||
|
||
# TODO try mixed_precision=fp16 | ||
# TODO try num_processes = 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from lora_diffusion.cli_lora_pti import train | ||
from lora_diffusion.dataset import STYLE_TEMPLATE | ||
|
||
MODEL_NAME = "riffusion/riffusion-model-v1" | ||
INSTANCE_DIR = "/tmp/sample_clips_xzv8p57g/images" | ||
OUTPUT_DIR = "./lora_output_acoustic" | ||
|
||
if __name__ == "__main__": | ||
entries = [ | ||
"music in the style of {}", | ||
"sound in the style of {}", | ||
"vibe in the style of {}", | ||
"audio in the style of {}", | ||
"groove in the style of {}", | ||
] | ||
for i in range(len(STYLE_TEMPLATE)): | ||
STYLE_TEMPLATE[i] = entries[i % len(entries)] | ||
print(STYLE_TEMPLATE) | ||
|
||
train( | ||
pretrained_model_name_or_path=MODEL_NAME, | ||
instance_data_dir=INSTANCE_DIR, | ||
output_dir=OUTPUT_DIR, | ||
train_text_encoder=True, | ||
resolution=512, | ||
train_batch_size=1, | ||
gradient_accumulation_steps=4, | ||
scale_lr=True, | ||
learning_rate_unet=1e-4, | ||
learning_rate_text=1e-5, | ||
learning_rate_ti=5e-4, | ||
color_jitter=False, | ||
lr_scheduler="linear", | ||
lr_warmup_steps=0, | ||
placeholder_tokens="<s1>|<s2>", | ||
use_template="style", | ||
save_steps=100, | ||
max_train_steps_ti=1000, | ||
max_train_steps_tuning=1000, | ||
perform_inversion=True, | ||
clip_ti_decay=True, | ||
weight_decay_ti=0.000, | ||
weight_decay_lora=0.001, | ||
continue_inversion=True, | ||
continue_inversion_lr=1e-4, | ||
device="cuda:0", | ||
lora_rank=1, | ||
use_face_segmentation_condition=False, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
export MODEL_NAME="riffusion/riffusion-model-v1" | ||
export INSTANCE_DIR="/tmp/sample_clips_xzv8p57g/images" | ||
export OUTPUT_DIR="./lora_output_acoustic" | ||
|
||
lora_pti \ | ||
--pretrained_model_name_or_path=$MODEL_NAME \ | ||
--instance_data_dir=$INSTANCE_DIR \ | ||
--output_dir=$OUTPUT_DIR \ | ||
--train_text_encoder \ | ||
--resolution=512 \ | ||
# Started as 1 | ||
--train_batch_size=4 \ | ||
--gradient_accumulation_steps=4 \ | ||
--scale_lr \ | ||
--learning_rate_unet=1e-4 \ | ||
--learning_rate_text=1e-5 \ | ||
--learning_rate_ti=5e-4 \ | ||
# --color_jitter \ | ||
--lr_scheduler="linear" \ | ||
--lr_warmup_steps=0 \ | ||
--placeholder_tokens="<s>" \ | ||
# initializer tokens | ||
# class prompt | ||
# --use_template="style"\ | ||
--save_steps=100 \ | ||
--max_train_steps_ti=1000 \ | ||
--max_train_steps_tuning=1000 \ | ||
--perform_inversion=True \ | ||
--clip_ti_decay \ | ||
--weight_decay_ti=0.000 \ | ||
--weight_decay_lora=0.001\ | ||
--continue_inversion \ | ||
--continue_inversion_lr=1e-4 \ | ||
--device="cuda:0" \ | ||
# 1 or 4? | ||
--lora_rank=4 \ | ||
# --use_face_segmentation_condition\ |
Oops, something went wrong.