From bd78f63a54e439a46f162f191618e3ba554aeef6 Mon Sep 17 00:00:00 2001 From: cmdr2 Date: Wed, 17 May 2023 15:54:59 +0530 Subject: [PATCH] Reduce peak VRAM by releasing large attention tensors (as soon as they're unnecessary) (#3463) Release large tensors in attention (as soon as they're no longer required). Reduces peak VRAM by nearly 2 GB for 1024x1024 (even after slicing), and the savings scale up with image size. --- src/diffusers/models/attention_processor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py index f88400da03..a489814c47 100644 --- a/src/diffusers/models/attention_processor.py +++ b/src/diffusers/models/attention_processor.py @@ -344,11 +344,14 @@ class Attention(nn.Module): beta=beta, alpha=self.scale, ) + del baddbmm_input if self.upcast_softmax: attention_scores = attention_scores.float() attention_probs = attention_scores.softmax(dim=-1) + del attention_scores + attention_probs = attention_probs.to(dtype) return attention_probs