@@ -40,22 +40,25 @@ The quantized HunyuanVideo model below requires ~14GB of VRAM.
40
40
41
41
``` py
42
42
import torch
43
- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AutoModel, HunyuanVideoPipeline
43
+ from diffusers import AutoModel, HunyuanVideoPipeline
44
+ from diffusers.quantizers import PipelineQuantizationConfig
44
45
from diffusers.utils import export_to_video
45
46
46
47
# quantize weights to int4 with bitsandbytes
47
- quant_config = DiffusersBitsAndBytesConfig(load_in_4bit = True )
48
- transformer = AutoModel.from_pretrained(
49
- " hunyuanvideo-community/HunyuanVideo" ,
50
- subfolder = " transformer" ,
51
- quantization_config = quant_config,
52
- torch_dtype = torch.bfloat16,
48
+ pipeline_quant_config = PipelineQuantizationConfig(
49
+ quant_backend = " bitsandbytes_4bit" ,
50
+ quant_kwargs = {
51
+ " load_in_4bit" : True ,
52
+ " bnb_4bit_quant_type" : " nf4" ,
53
+ " bnb_4bit_compute_dtype" : torch.bfloat16
54
+ },
55
+ components_to_quantize = [" transformer" ]
53
56
)
54
57
55
58
pipeline = HunyuanVideoPipeline.from_pretrained(
56
59
" hunyuanvideo-community/HunyuanVideo" ,
57
- transformer = transformer ,
58
- torch_dtype = torch.float16 ,
60
+ quantization_config = pipeline_quant_config ,
61
+ torch_dtype = torch.bfloat16 ,
59
62
)
60
63
61
64
# model-offloading and tiling
@@ -74,22 +77,25 @@ Compilation is slow the first time but subsequent calls to the pipeline are fast
74
77
75
78
``` py
76
79
import torch
77
- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AutoModel, HunyuanVideoPipeline
80
+ from diffusers import AutoModel, HunyuanVideoPipeline
81
+ from diffusers.quantizers import PipelineQuantizationConfig
78
82
from diffusers.utils import export_to_video
79
83
80
84
# quantize weights to int4 with bitsandbytes
81
- quant_config = DiffusersBitsAndBytesConfig(load_in_4bit = True )
82
- transformer = AutoModel.from_pretrained(
83
- " hunyuanvideo-community/HunyuanVideo" ,
84
- subfolder = " transformer" ,
85
- quantization_config = quant_config,
86
- torch_dtype = torch.bfloat16,
85
+ pipeline_quant_config = PipelineQuantizationConfig(
86
+ quant_backend = " bitsandbytes_4bit" ,
87
+ quant_kwargs = {
88
+ " load_in_4bit" : True ,
89
+ " bnb_4bit_quant_type" : " nf4" ,
90
+ " bnb_4bit_compute_dtype" : torch.bfloat16
91
+ },
92
+ components_to_quantize = [" transformer" ]
87
93
)
88
94
89
95
pipeline = HunyuanVideoPipeline.from_pretrained(
90
96
" hunyuanvideo-community/HunyuanVideo" ,
91
- transformer = transformer ,
92
- torch_dtype = torch.float16 ,
97
+ quantization_config = pipeline_quant_config ,
98
+ torch_dtype = torch.bfloat16 ,
93
99
)
94
100
95
101
# model-offloading and tiling
@@ -116,22 +122,25 @@ export_to_video(video, "output.mp4", fps=15)
116
122
117
123
``` py
118
124
import torch
119
- from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, AutoModel, HunyuanVideoPipeline
125
+ from diffusers import AutoModel, HunyuanVideoPipeline
126
+ from diffusers.quantizers import PipelineQuantizationConfig
120
127
from diffusers.utils import export_to_video
121
128
122
129
# quantize weights to int4 with bitsandbytes
123
- quant_config = DiffusersBitsAndBytesConfig(load_in_4bit = True )
124
- transformer = AutoModel.from_pretrained(
125
- " hunyuanvideo-community/HunyuanVideo" ,
126
- subfolder = " transformer" ,
127
- quantization_config = quant_config,
128
- torch_dtype = torch.bfloat16,
130
+ pipeline_quant_config = PipelineQuantizationConfig(
131
+ quant_backend = " bitsandbytes_4bit" ,
132
+ quant_kwargs = {
133
+ " load_in_4bit" : True ,
134
+ " bnb_4bit_quant_type" : " nf4" ,
135
+ " bnb_4bit_compute_dtype" : torch.bfloat16
136
+ },
137
+ components_to_quantize = [" transformer" ]
129
138
)
130
139
131
140
pipeline = HunyuanVideoPipeline.from_pretrained(
132
141
" hunyuanvideo-community/HunyuanVideo" ,
133
- transformer = transformer ,
134
- torch_dtype = torch.float16 ,
142
+ quantization_config = pipeline_quant_config ,
143
+ torch_dtype = torch.bfloat16 ,
135
144
)
136
145
137
146
# load LoRA weights
0 commit comments