From b351be2379ed14f8599301a69739d7e59e220de1 Mon Sep 17 00:00:00 2001 From: Yahweasel Date: Thu, 15 Jan 2026 02:10:57 -0500 Subject: [PATCH] LongCat Image pipeline: Allow offloading/quantization of text_encoder component (#12963) * Don't attempt to move the text_encoder. Just move the generated_ids. * The inputs to the text_encoder should be on its device --- .../pipelines/longcat_image/pipeline_longcat_image.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/longcat_image/pipeline_longcat_image.py b/src/diffusers/pipelines/longcat_image/pipeline_longcat_image.py index a758d545fa..ca28422f9c 100644 --- a/src/diffusers/pipelines/longcat_image/pipeline_longcat_image.py +++ b/src/diffusers/pipelines/longcat_image/pipeline_longcat_image.py @@ -260,10 +260,10 @@ class LongCatImagePipeline(DiffusionPipeline, FromSingleFileMixin): text = self.text_processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True) all_text.append(text) - inputs = self.text_processor(text=all_text, padding=True, return_tensors="pt").to(device) + inputs = self.text_processor(text=all_text, padding=True, return_tensors="pt").to(self.text_encoder.device) - self.text_encoder.to(device) generated_ids = self.text_encoder.generate(**inputs, max_new_tokens=self.tokenizer_max_length) + generated_ids.to(device) generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] output_text = self.text_processor.batch_decode( generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False