From f91943e4a65458386afb38d85fd528c96d7e9770 Mon Sep 17 00:00:00 2001 From: chengzeyi Date: Fri, 20 Dec 2024 17:30:16 +0800 Subject: [PATCH] update hunyuanvideo performance on single L20 --- docs/performance/hunyuanvideo.md | 3 ++- examples/run_hunyuan_video_usp.sh | 43 +++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100755 examples/run_hunyuan_video_usp.sh diff --git a/docs/performance/hunyuanvideo.md b/docs/performance/hunyuanvideo.md index 0832bba..e214923 100644 --- a/docs/performance/hunyuanvideo.md +++ b/docs/performance/hunyuanvideo.md @@ -10,6 +10,7 @@ xDiT is [HunyuanVideo](https://github.com/Tencent/HunyuanVideo?tab=readme-ov-fil |----------|--------|---------|---------|---------| | H100 | 1,904.08 | 925.04 | 514.08 | 337.58 | | H20 | 6,639.17 | 3,400.55 | 1,762.86 | 940.97 | +| L20 | 6,043.88 | | | | @@ -22,4 +23,4 @@ xDiT is [HunyuanVideo](https://github.com/Tencent/HunyuanVideo?tab=readme-ov-fil | H100 | 1,735.01 | 934.09 | 645.45 | 367.02 | | H20 | 6,621.46 | 3,400.55 | 2,310.48 | 1,214.67 | - \ No newline at end of file + diff --git a/examples/run_hunyuan_video_usp.sh b/examples/run_hunyuan_video_usp.sh new file mode 100755 index 0000000..c1f8813 --- /dev/null +++ b/examples/run_hunyuan_video_usp.sh @@ -0,0 +1,43 @@ +#!/bin/bash +set -x + +export PYTHONPATH=$PWD:$PYTHONPATH + +# CogVideoX configuration +SCRIPT="hunyuan_video_usp_example.py" +MODEL_ID="/cfs/dit/HunyuanVideo" +# MODEL_ID="tencent/HunyuanVideo" +INFERENCE_STEP=50 + +mkdir -p ./results + +# CogVideoX specific task args +TASK_ARGS="--height 720 --width 1280 --num_frames 129" + +# CogVideoX parallel configuration +N_GPUS=8 +PARALLEL_ARGS="--ulysses_degree 4 --ring_degree 2" +# CFG_ARGS="--use_cfg_parallel" + +# Uncomment and modify these as needed +# PIPEFUSION_ARGS="--num_pipeline_patch 8" +# OUTPUT_ARGS="--output_type latent" +# PARALLLEL_VAE="--use_parallel_vae" +ENABLE_TILING="--enable_tiling" +ENABLE_MODEL_CPU_OFFLOAD="--enable_model_cpu_offload" +# COMPILE_FLAG="--use_torch_compile" + +torchrun --nproc_per_node=$N_GPUS ./examples/$SCRIPT \ +--model $MODEL_ID \ +$PARALLEL_ARGS \ +$TASK_ARGS \ +$PIPEFUSION_ARGS \ +$OUTPUT_ARGS \ +--num_inference_steps $INFERENCE_STEP \ +--warmup_steps 0 \ +--prompt "A cat walks on the grass, realistic" \ +$CFG_ARGS \ +$PARALLLEL_VAE \ +$ENABLE_TILING \ +$ENABLE_MODEL_CPU_OFFLOAD \ +$COMPILE_FLAG