divisor.dimoo.prompt_utils

Prompt generation utilities for different inference types

 1"""
 2Prompt generation utilities for different inference types
 3"""
 4
 5from typing import Dict, List, Tuple, Optional
 6
 7
 8def create_prompt_templates() -> dict:
 9    """Create prompt templates for various tasks"""
10    templates = {
11        "text_to_text": "Generate text according to the text prompt",
12        "text_understanding": "You are a multimodal model that can process both text and images. Answer the following question based on the provided images. Analyze each image and combine relevant details to answer.",
13        "image_generation": "Generate an image according to the text prompt.",
14        "image_editing": "Generate an image applying the following editing instruction based on the original image.",
15        "dense_prediction": "Perform dense prediction on the given images.",
16        "control_generation": "Generate an image according to the text prompt and the given control image.",
17        "subject_generation": "Generate an image according to the text prompt and the given object image.",
18        "multi_view": "Generate a view-image based on the given image.",
19        "style_transfer": "Transform the current image into the style of the provided image.",
20    }
21    return templates
22
23
24def generate_multimodal_understanding_prompt(question: str, templates: Optional[Dict] = None) -> str:
25    """Generate prompt for multimodal understanding (MMU)\n
26    :param question: User question about the image
27    :param templates: Optional prompt templates dict
28    :return: Formatted input prompt"""
29
30    if templates is None:
31        templates = create_prompt_templates()
32
33    system_prompt = templates["text_understanding"]
34    input_prompt = "<system>" + system_prompt + "</system>" + "<user>" + question + "</user>"
35
36    return input_prompt
37
38
39def generate_text_prompt(question: str, templates: dict = create_prompt_templates()) -> str:
40    """Generate prompt for text to text generation\n
41    :param question: User question
42    :param templates: Optional prompt templates dict
43    :return: Formatted input prompt"""
44
45    system_prompt = templates["text_to_text"]
46    input_prompt = "<system>" + system_prompt + "</system>" + "<user>" + question + "</user>"
47
48    return input_prompt
def create_prompt_templates() -> dict:
 9def create_prompt_templates() -> dict:
10    """Create prompt templates for various tasks"""
11    templates = {
12        "text_to_text": "Generate text according to the text prompt",
13        "text_understanding": "You are a multimodal model that can process both text and images. Answer the following question based on the provided images. Analyze each image and combine relevant details to answer.",
14        "image_generation": "Generate an image according to the text prompt.",
15        "image_editing": "Generate an image applying the following editing instruction based on the original image.",
16        "dense_prediction": "Perform dense prediction on the given images.",
17        "control_generation": "Generate an image according to the text prompt and the given control image.",
18        "subject_generation": "Generate an image according to the text prompt and the given object image.",
19        "multi_view": "Generate a view-image based on the given image.",
20        "style_transfer": "Transform the current image into the style of the provided image.",
21    }
22    return templates

Create prompt templates for various tasks

def generate_multimodal_understanding_prompt(question: str, templates: Optional[Dict] = None) -> str:
25def generate_multimodal_understanding_prompt(question: str, templates: Optional[Dict] = None) -> str:
26    """Generate prompt for multimodal understanding (MMU)\n
27    :param question: User question about the image
28    :param templates: Optional prompt templates dict
29    :return: Formatted input prompt"""
30
31    if templates is None:
32        templates = create_prompt_templates()
33
34    system_prompt = templates["text_understanding"]
35    input_prompt = "<system>" + system_prompt + "</system>" + "<user>" + question + "</user>"
36
37    return input_prompt

Generate prompt for multimodal understanding (MMU)

Parameters
  • question: User question about the image
  • templates: Optional prompt templates dict
Returns

Formatted input prompt

def generate_text_prompt( question: str, templates: dict = {'text_to_text': 'Generate text according to the text prompt', 'text_understanding': 'You are a multimodal model that can process both text and images. Answer the following question based on the provided images. Analyze each image and combine relevant details to answer.', 'image_generation': 'Generate an image according to the text prompt.', 'image_editing': 'Generate an image applying the following editing instruction based on the original image.', 'dense_prediction': 'Perform dense prediction on the given images.', 'control_generation': 'Generate an image according to the text prompt and the given control image.', 'subject_generation': 'Generate an image according to the text prompt and the given object image.', 'multi_view': 'Generate a view-image based on the given image.', 'style_transfer': 'Transform the current image into the style of the provided image.'}) -> str:
40def generate_text_prompt(question: str, templates: dict = create_prompt_templates()) -> str:
41    """Generate prompt for text to text generation\n
42    :param question: User question
43    :param templates: Optional prompt templates dict
44    :return: Formatted input prompt"""
45
46    system_prompt = templates["text_to_text"]
47    input_prompt = "<system>" + system_prompt + "</system>" + "<user>" + question + "</user>"
48
49    return input_prompt

Generate prompt for text to text generation

Parameters
  • question: User question
  • templates: Optional prompt templates dict
Returns

Formatted input prompt