diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 14dbfe3ea1..188b21f7f1 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -126,6 +126,8 @@ title: ComponentsManager - local: modular_diffusers/guiders title: Guiders + - local: modular_diffusers/custom_blocks + title: Building Custom Blocks - title: Training isExpanded: false diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 8c7682ba6b..0b8815886b 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -13,11 +13,19 @@ specific language governing permissions and limitations under the License. # Building Custom Blocks -Modular Diffusers allows you to create custom blocks that can be used in a pipeline. This guide will show you how to create a custom block, define its inputs and outputs, and implement the computation logic. +Modular Diffusers allows you to create custom blocks that can be plugged into Modular Pipelines. This guide will show you how to create and use a custom block. -Let's create a custom block that uses the Florence2 model to process an input image and generate a mask for inpainting +First let's take a look at the structure of our custom block project: -First let's define a custom block in a file called `block.py`: +```shell +. +├── block.py +└── modular_config.json +``` + +The code to define the custom block lives in a file called `block.py`. The `modular_config.json` file contains metadata for loading the block with Modular Diffusers. + +In this example, we will create a custom block that uses the Florence 2 model to process an input image and generate a mask for inpainting ```py from typing import List, Union @@ -32,7 +40,7 @@ from diffusers.modular_pipelines import ( ComponentSpec, OutputParam, ) -from transformers import AutoProcessor, AutoModelForCausalLM +from transformers import AutoProcessor, Florence2ForConditionalGeneration class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): @@ -41,13 +49,13 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): return [ ComponentSpec( name="image_annotator", - type_hint=AutoModelForCausalLM, - repo="mrhendrey/Florence-2-large-ft-safetensors", + type_hint=Florence2ForConditionalGeneration, + repo="florence-community/Florence-2-base-ft", ), ComponentSpec( name="image_annotator_processor", type_hint=AutoProcessor, - repo="mrhendrey/Florence-2-large-ft-safetensors", + repo="florence-community/Florence-2-base-ft", ), ] @@ -93,12 +101,10 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): required=True, default="mask_image", description="""Output type from annotation predictions. Availabe options are - annotation: - - raw annotation predictions from the model based on task type. mask_image: -black and white mask image for the given image based on the task type mask_overlay: - - white mask overlayed on the original image + - mask overlayed on the original image bounding_box: - bounding boxes drawn on the original image """, @@ -159,7 +165,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ) return outputs - def prepare_mask(self, images, annotations, overlay=False): + def prepare_mask(self, images, annotations, overlay=False, fill="white"): masks = [] for image, annotation in zip(images, annotations): mask_image = image.copy() if overlay else Image.new("L", image.size, 0) @@ -172,7 +178,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): if len(polygon) < 3: continue polygon = polygon.reshape(-1).tolist() - draw.polygon(polygon, fill="white") + draw.polygon(polygon, fill=fill) elif "bbox" in _annotation: bbox = _annotation["bbox"] @@ -218,6 +224,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): block_state.image, block_state.annotation_prompt ) task = block_state.annotation_task + fill = block_state.fill annotations = self.get_annotations( components, images, annotation_task_prompt, task @@ -229,7 +236,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): block_state.mask_image = None if block_state.annotation_output_type == "mask_overlay": - block_state.image = self.prepare_mask(images, annotations, overlay=True) + block_state.image = self.prepare_mask(images, annotations, overlay=True, fill=fill) elif block_state.annotation_output_type == "bounding_box": block_state.image = self.prepare_bounding_boxes(images, annotations) @@ -239,7 +246,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): return components, state ``` -Once we have defined our custom block, we can save it as a model repo so that we can easily reuse it. +Now that we have defined our custom block, we can save it as a model repository on the Huggingface Hub so that it is easy to share and reuse. There are two ways to save the block: @@ -275,7 +282,7 @@ from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS from diffusers.utils import load_image # Fetch the Florence2 image annotator block that will create our mask -image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence2-image-annotator", trust_remote_code=True) +image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True) my_blocks = INPAINT_BLOCKS.copy() # insert the annotation block before the image encoding step @@ -284,7 +291,7 @@ my_blocks.insert("image_annotator", image_annotator_block, 1) # Create our initial set of inpainting blocks blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) -repo_id = "diffusers-internal-dev/modular-sdxl-inpainting" +repo_id = "diffusers/modular-stable-diffusion-xl-base-1.0" pipe = blocks.init_pipeline(repo_id) pipe.load_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True)