mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
a few fix for kandinsky combined pipeline (#4352)
* add xformer * enable_sequential_cpu_offload * style * Update src/diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --------- Co-authored-by: yiyixuxu <yixu310@gmail,com> Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
This commit is contained in:
@@ -188,6 +188,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
||||
movq=movq,
|
||||
)
|
||||
|
||||
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
||||
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
@@ -198,6 +201,16 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
|
||||
self.prior_pipe.enable_model_cpu_offload()
|
||||
self.decoder_pipe.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗 Accelerate, significantly reducing memory usage. Models are moved to a
|
||||
`torch.device('meta')` and loaded on a GPU only when their specific submodule's `forward` method is called.
|
||||
Offloading happens on a submodule basis. Memory savings are higher than using
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
"""
|
||||
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
|
||||
def progress_bar(self, iterable=None, total=None):
|
||||
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
||||
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
||||
@@ -398,6 +411,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
||||
movq=movq,
|
||||
)
|
||||
|
||||
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
||||
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
@@ -408,6 +424,17 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
|
||||
self.prior_pipe.enable_model_cpu_offload()
|
||||
self.decoder_pipe.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
||||
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
||||
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
"""
|
||||
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
|
||||
def progress_bar(self, iterable=None, total=None):
|
||||
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
||||
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
||||
@@ -630,6 +657,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
||||
movq=movq,
|
||||
)
|
||||
|
||||
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
||||
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
@@ -640,6 +670,17 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
|
||||
self.prior_pipe.enable_model_cpu_offload()
|
||||
self.decoder_pipe.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
||||
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
||||
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
"""
|
||||
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
|
||||
def progress_bar(self, iterable=None, total=None):
|
||||
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
||||
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
||||
|
||||
@@ -177,6 +177,9 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
|
||||
movq=movq,
|
||||
)
|
||||
|
||||
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
||||
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
@@ -187,6 +190,17 @@ class KandinskyV22CombinedPipeline(DiffusionPipeline):
|
||||
self.prior_pipe.enable_model_cpu_offload()
|
||||
self.decoder_pipe.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
||||
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
||||
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
"""
|
||||
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
|
||||
def progress_bar(self, iterable=None, total=None):
|
||||
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
||||
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
||||
@@ -378,6 +392,9 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
||||
movq=movq,
|
||||
)
|
||||
|
||||
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
||||
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
@@ -388,6 +405,17 @@ class KandinskyV22Img2ImgCombinedPipeline(DiffusionPipeline):
|
||||
self.prior_pipe.enable_model_cpu_offload()
|
||||
self.decoder_pipe.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
||||
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
||||
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
"""
|
||||
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
|
||||
def progress_bar(self, iterable=None, total=None):
|
||||
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
||||
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
||||
@@ -601,6 +629,9 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
||||
movq=movq,
|
||||
)
|
||||
|
||||
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
||||
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
||||
|
||||
def enable_model_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
||||
@@ -611,6 +642,17 @@ class KandinskyV22InpaintCombinedPipeline(DiffusionPipeline):
|
||||
self.prior_pipe.enable_model_cpu_offload()
|
||||
self.decoder_pipe.enable_model_cpu_offload()
|
||||
|
||||
def enable_sequential_cpu_offload(self, gpu_id=0):
|
||||
r"""
|
||||
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
|
||||
text_encoder, vae and safety checker have their state dicts saved to CPU and then are moved to a
|
||||
`torch.device('meta') and loaded to GPU only when their specific submodule has its `forward` method called.
|
||||
Note that offloading happens on a submodule basis. Memory savings are higher than with
|
||||
`enable_model_cpu_offload`, but performance is lower.
|
||||
"""
|
||||
self.prior_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
self.decoder_pipe.enable_sequential_cpu_offload(gpu_id=gpu_id)
|
||||
|
||||
def progress_bar(self, iterable=None, total=None):
|
||||
self.prior_pipe.progress_bar(iterable=iterable, total=total)
|
||||
self.decoder_pipe.progress_bar(iterable=iterable, total=total)
|
||||
|
||||
@@ -51,7 +51,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
|
||||
"output_type",
|
||||
"return_dict",
|
||||
]
|
||||
test_xformers_attention = False
|
||||
test_xformers_attention = True
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Dummies()
|
||||
|
||||
@@ -55,7 +55,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
|
||||
"output_type",
|
||||
"return_dict",
|
||||
]
|
||||
test_xformers_attention = False
|
||||
test_xformers_attention = True
|
||||
|
||||
def get_dummy_components(self):
|
||||
dummy = Dummies()
|
||||
|
||||
Reference in New Issue
Block a user