Skip to content

Commit

Permalink
make hunyuan video work with --enable_model_cpu_offload
Browse files Browse the repository at this point in the history
  • Loading branch information
chengzeyi committed Dec 20, 2024
1 parent 8bc8e8d commit 88102b9
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions examples/hunyuan_video_usp_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ def main():
revision="refs/pr/18",
)

initialize_runtime_state(pipe, engine_config)
get_runtime_state().set_video_input_parameters(
height=input_config.height,
width=input_config.width,
num_frames=input_config.num_frames,
batch_size=1,
num_inference_steps=input_config.num_inference_steps,
split_text_embed_in_sp=get_pipeline_parallel_world_size() == 1,
)

parallelize_transformer(pipe)

if args.enable_sequential_cpu_offload:
pipe.enable_sequential_cpu_offload(gpu_id=local_rank)
logging.info(f"rank {local_rank} sequential CPU offload enabled")
Expand Down Expand Up @@ -234,18 +246,6 @@ def main():
parameter_peak_memory = torch.cuda.max_memory_allocated(
device=f"cuda:{local_rank}")

initialize_runtime_state(pipe, engine_config)
get_runtime_state().set_video_input_parameters(
height=input_config.height,
width=input_config.width,
num_frames=input_config.num_frames,
batch_size=1,
num_inference_steps=input_config.num_inference_steps,
split_text_embed_in_sp=get_pipeline_parallel_world_size() == 1,
)

parallelize_transformer(pipe)

if engine_config.runtime_config.use_torch_compile:
torch._inductor.config.reorder_for_compute_comm_overlap = True
pipe.transformer = torch.compile(pipe.transformer,
Expand Down Expand Up @@ -299,5 +299,6 @@ def main():


# mkdir -p results && torchrun --nproc_per_node=2 examples/hunyuan_video_usp_example.py --model tencent/HunyuanVideo --ulysses_degree 2 --num_inference_steps 30 --warmup_steps 0 --prompt "A cat walks on the grass, realistic" --height 320 --width 512 --num_frames 61 --enable_tiling
# mkdir -p results && torchrun --nproc_per_node=2 examples/hunyuan_video_usp_example.py --model tencent/HunyuanVideo --ulysses_degree 2 --num_inference_steps 30 --warmup_steps 0 --prompt "A cat walks on the grass, realistic" --height 544 --width 960 --num_frames 129 --enable_tiling --enable_model_cpu_offload
if __name__ == "__main__":
main()

0 comments on commit 88102b9

Please sign in to comment.