Skip to content

Commit 8c661ea

Browse files
authored
enable lora cases on XPU (#11506)
* enable lora cases on XPU Signed-off-by: Yao Matrix <matrix.yao@intel.com> * remove hunyuanvideo xpu expectation Signed-off-by: Yao Matrix <matrix.yao@intel.com> --------- Signed-off-by: Yao Matrix <matrix.yao@intel.com>
1 parent d7ffe60 commit 8c661ea

File tree

5 files changed

+44
-34
lines changed

5 files changed

+44
-34
lines changed

tests/lora/test_lora_layers_flux.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,14 @@
3131
from diffusers.utils import load_image, logging
3232
from diffusers.utils.testing_utils import (
3333
CaptureLogger,
34+
backend_empty_cache,
3435
floats_tensor,
3536
is_peft_available,
3637
nightly,
3738
numpy_cosine_similarity_distance,
38-
require_big_gpu_with_torch_cuda,
39+
require_big_accelerator,
3940
require_peft_backend,
40-
require_torch_gpu,
41+
require_torch_accelerator,
4142
slow,
4243
torch_device,
4344
)
@@ -809,10 +810,10 @@ def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self):
809810

810811
@slow
811812
@nightly
812-
@require_torch_gpu
813+
@require_torch_accelerator
813814
@require_peft_backend
814-
@require_big_gpu_with_torch_cuda
815-
@pytest.mark.big_gpu_with_torch_cuda
815+
@require_big_accelerator
816+
@pytest.mark.big_accelerator
816817
class FluxLoRAIntegrationTests(unittest.TestCase):
817818
"""internal note: The integration slices were obtained on audace.
818819
@@ -827,7 +828,7 @@ def setUp(self):
827828
super().setUp()
828829

829830
gc.collect()
830-
torch.cuda.empty_cache()
831+
backend_empty_cache(torch_device)
831832

832833
self.pipeline = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
833834

@@ -836,13 +837,13 @@ def tearDown(self):
836837

837838
del self.pipeline
838839
gc.collect()
839-
torch.cuda.empty_cache()
840+
backend_empty_cache(torch_device)
840841

841842
def test_flux_the_last_ben(self):
842843
self.pipeline.load_lora_weights("TheLastBen/Jon_Snow_Flux_LoRA", weight_name="jon_snow.safetensors")
843844
self.pipeline.fuse_lora()
844845
self.pipeline.unload_lora_weights()
845-
# Instead of calling `enable_model_cpu_offload()`, we do a cuda placement here because the CI
846+
# Instead of calling `enable_model_cpu_offload()`, we do a accelerator placement here because the CI
846847
# run supports it. We have about 34GB RAM in the CI runner which kills the test when run with
847848
# `enable_model_cpu_offload()`. We repeat this for the other tests, too.
848849
self.pipeline = self.pipeline.to(torch_device)
@@ -956,10 +957,10 @@ def test_flux_xlabs_load_lora_with_single_blocks(self):
956957

957958

958959
@nightly
959-
@require_torch_gpu
960+
@require_torch_accelerator
960961
@require_peft_backend
961-
@require_big_gpu_with_torch_cuda
962-
@pytest.mark.big_gpu_with_torch_cuda
962+
@require_big_accelerator
963+
@pytest.mark.big_accelerator
963964
class FluxControlLoRAIntegrationTests(unittest.TestCase):
964965
num_inference_steps = 10
965966
seed = 0
@@ -969,17 +970,17 @@ def setUp(self):
969970
super().setUp()
970971

971972
gc.collect()
972-
torch.cuda.empty_cache()
973+
backend_empty_cache(torch_device)
973974

974975
self.pipeline = FluxControlPipeline.from_pretrained(
975976
"black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16
976-
).to("cuda")
977+
).to(torch_device)
977978

978979
def tearDown(self):
979980
super().tearDown()
980981

981982
gc.collect()
982-
torch.cuda.empty_cache()
983+
backend_empty_cache(torch_device)
983984

984985
@parameterized.expand(["black-forest-labs/FLUX.1-Canny-dev-lora", "black-forest-labs/FLUX.1-Depth-dev-lora"])
985986
def test_lora(self, lora_ckpt_id):

tests/lora/test_lora_layers_hunyuanvideo.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,16 @@
2828
HunyuanVideoTransformer3DModel,
2929
)
3030
from diffusers.utils.testing_utils import (
31+
Expectations,
32+
backend_empty_cache,
3133
floats_tensor,
3234
nightly,
3335
numpy_cosine_similarity_distance,
34-
require_big_gpu_with_torch_cuda,
36+
require_big_accelerator,
3537
require_peft_backend,
36-
require_torch_gpu,
38+
require_torch_accelerator,
3739
skip_mps,
40+
torch_device,
3841
)
3942

4043

@@ -192,10 +195,10 @@ def test_simple_inference_with_text_lora_save_load(self):
192195

193196

194197
@nightly
195-
@require_torch_gpu
198+
@require_torch_accelerator
196199
@require_peft_backend
197-
@require_big_gpu_with_torch_cuda
198-
@pytest.mark.big_gpu_with_torch_cuda
200+
@require_big_accelerator
201+
@pytest.mark.big_accelerator
199202
class HunyuanVideoLoRAIntegrationTests(unittest.TestCase):
200203
"""internal note: The integration slices were obtained on DGX.
201204
@@ -210,21 +213,21 @@ def setUp(self):
210213
super().setUp()
211214

212215
gc.collect()
213-
torch.cuda.empty_cache()
216+
backend_empty_cache(torch_device)
214217

215218
model_id = "hunyuanvideo-community/HunyuanVideo"
216219
transformer = HunyuanVideoTransformer3DModel.from_pretrained(
217220
model_id, subfolder="transformer", torch_dtype=torch.bfloat16
218221
)
219222
self.pipeline = HunyuanVideoPipeline.from_pretrained(
220223
model_id, transformer=transformer, torch_dtype=torch.float16
221-
).to("cuda")
224+
).to(torch_device)
222225

223226
def tearDown(self):
224227
super().tearDown()
225228

226229
gc.collect()
227-
torch.cuda.empty_cache()
230+
backend_empty_cache(torch_device)
228231

229232
def test_original_format_cseti(self):
230233
self.pipeline.load_lora_weights(
@@ -249,8 +252,13 @@ def test_original_format_cseti(self):
249252
out_slice = np.concatenate((out[:8], out[-8:]))
250253

251254
# fmt: off
252-
expected_slice = np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815])
255+
expected_slices = Expectations(
256+
{
257+
("cuda", 7): np.array([0.1013, 0.1924, 0.0078, 0.1021, 0.1929, 0.0078, 0.1023, 0.1919, 0.7402, 0.104, 0.4482, 0.7354, 0.0925, 0.4382, 0.7275, 0.0815]),
258+
}
259+
)
253260
# fmt: on
261+
expected_slice = expected_slices.get_expectation()
254262

255263
max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), out_slice)
256264

tests/lora/test_lora_layers_sd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,12 @@ def output_shape(self):
9393
def setUp(self):
9494
super().setUp()
9595
gc.collect()
96-
torch.cuda.empty_cache()
96+
backend_empty_cache(torch_device)
9797

9898
def tearDown(self):
9999
super().tearDown()
100100
gc.collect()
101-
torch.cuda.empty_cache()
101+
backend_empty_cache(torch_device)
102102

103103
# Keeping this test here makes sense because it doesn't look any integration
104104
# (value assertions on logits).

tests/lora/test_lora_layers_sd3.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
is_flaky,
3535
nightly,
3636
numpy_cosine_similarity_distance,
37-
require_big_gpu_with_torch_cuda,
37+
require_big_accelerator,
3838
require_peft_backend,
3939
require_torch_accelerator,
4040
torch_device,
@@ -138,8 +138,8 @@ def test_multiple_wrong_adapter_name_raises_error(self):
138138
@nightly
139139
@require_torch_accelerator
140140
@require_peft_backend
141-
@require_big_gpu_with_torch_cuda
142-
@pytest.mark.big_gpu_with_torch_cuda
141+
@require_big_accelerator
142+
@pytest.mark.big_accelerator
143143
class SD3LoraIntegrationTests(unittest.TestCase):
144144
pipeline_class = StableDiffusion3Img2ImgPipeline
145145
repo_id = "stabilityai/stable-diffusion-3-medium-diffusers"

tests/lora/test_lora_layers_sdxl.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,13 @@
3737
from diffusers.utils.import_utils import is_accelerate_available
3838
from diffusers.utils.testing_utils import (
3939
CaptureLogger,
40+
backend_empty_cache,
4041
is_flaky,
4142
load_image,
4243
nightly,
4344
numpy_cosine_similarity_distance,
4445
require_peft_backend,
45-
require_torch_gpu,
46+
require_torch_accelerator,
4647
slow,
4748
torch_device,
4849
)
@@ -105,12 +106,12 @@ def output_shape(self):
105106
def setUp(self):
106107
super().setUp()
107108
gc.collect()
108-
torch.cuda.empty_cache()
109+
backend_empty_cache(torch_device)
109110

110111
def tearDown(self):
111112
super().tearDown()
112113
gc.collect()
113-
torch.cuda.empty_cache()
114+
backend_empty_cache(torch_device)
114115

115116
@is_flaky
116117
def test_multiple_wrong_adapter_name_raises_error(self):
@@ -119,18 +120,18 @@ def test_multiple_wrong_adapter_name_raises_error(self):
119120

120121
@slow
121122
@nightly
122-
@require_torch_gpu
123+
@require_torch_accelerator
123124
@require_peft_backend
124125
class LoraSDXLIntegrationTests(unittest.TestCase):
125126
def setUp(self):
126127
super().setUp()
127128
gc.collect()
128-
torch.cuda.empty_cache()
129+
backend_empty_cache(torch_device)
129130

130131
def tearDown(self):
131132
super().tearDown()
132133
gc.collect()
133-
torch.cuda.empty_cache()
134+
backend_empty_cache(torch_device)
134135

135136
def test_sdxl_1_0_lora(self):
136137
generator = torch.Generator("cpu").manual_seed(0)

0 commit comments

Comments
 (0)