mirror of
https://github.com/huggingface/diffusers.git
synced 2026-01-27 17:22:53 +03:00
Reduce peak VRAM by releasing large attention tensors (as soon as they're unnecessary) (#3463)
Release large tensors in attention (as soon as they're no longer required). Reduces peak VRAM by nearly 2 GB for 1024x1024 (even after slicing), and the savings scale up with image size.
This commit is contained in:
@@ -344,11 +344,14 @@ class Attention(nn.Module):
|
||||
beta=beta,
|
||||
alpha=self.scale,
|
||||
)
|
||||
del baddbmm_input
|
||||
|
||||
if self.upcast_softmax:
|
||||
attention_scores = attention_scores.float()
|
||||
|
||||
attention_probs = attention_scores.softmax(dim=-1)
|
||||
del attention_scores
|
||||
|
||||
attention_probs = attention_probs.to(dtype)
|
||||
|
||||
return attention_probs
|
||||
|
||||
Reference in New Issue
Block a user