intel
diff --git a/‎docs/tutorials/features/fast_bert.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/tutorials/features/fast_bert.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cpu/features/fast_bert/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/cpu/features/fast_bert/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cpu/llm/fine-tuning/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎examples/cpu/llm/fine-tuning/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cpu/llm/inference/README.md‎
Lines changed: 1 addition & 0 deletions b/‎examples/cpu/llm/inference/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py‎
Lines changed: 45 additions & 2 deletions b/‎examples/cpu/llm/inference/distributed/run_generation_with_deepspeed.py‎
Lines changed: 45 additions & 2 deletions
diff --git a/‎examples/cpu/llm/inference/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎examples/cpu/llm/inference/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cpu/llm/inference/run.py‎
Lines changed: 1 addition & 0 deletions b/‎examples/cpu/llm/inference/run.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/cpu/llm/inference/single_instance/run_generation.py‎
Lines changed: 53 additions & 2 deletions b/‎examples/cpu/llm/inference/single_instance/run_generation.py‎
Lines changed: 53 additions & 2 deletions
@@ -9,7 +9,7 @@ Currently `ipex.fast_bert` API is only well optimized for training. For inferenc
 
 ### Prerequisite
 
-- Transformers 4.6.0 ~ 4.45.0
+- Transformers 4.6.0 ~ 4.46.2
 
 ### Usage Example
 
 
@@ -5,7 +5,7 @@
 Currently `ipex.fast_bert` API is only well optimized for training. For inference, it ensures functionality, while to get peak perf, please use `ipex.optimize` API + torchscript.
 
 # Prerequisite:
-Transformers 4.6.0 ~ 4.45.0
+Transformers 4.6.0 ~ 4.46.2
 
 # Usage Example:
 Training:
 
@@ -6,6 +6,6 @@ black[jupyter]
 datasets
 fire
 peft
-transformers==4.45.0
+transformers==4.46.2
 gradio
 sentencepiece
@@ -44,6 +44,7 @@
 |Phi| microsoft/Phi-3-medium-4k-instruct | 🟩 | 🟩 | 🟩 | 🟩 | 🟩 |
 |Phi| microsoft/Phi-3-medium-128k-instruct | 🟩 | 🟩 | 🟩 | 🟩 | 🟩 |
 |Whisper| openai/whisper-large-v2 | 🟩 | 🟩 | 🟩 | 🟩 |   |
+|Maira| microsoft/maira-2 | 🟩 | 🟩 |   | 🟩 |   |
 
 ## 1.2 Verified for distributed inference mode via DeepSpeed
 
 
@@ -298,6 +298,8 @@ def get_checkpoint_files(model_name_or_path):
 model_type = next((x for x in MODEL_CLASSES.keys() if x in model_name.lower()), "auto")
 if model_type == "llama" and args.vision_text_model:
     model_type = "mllama"
+if model_type == "maira-2":
+    model_type = "maira2"
 model_class = MODEL_CLASSES[model_type]
 tokenizer = model_class[1].from_pretrained(model_name, trust_remote_code=True)
 
@@ -350,6 +352,8 @@ def get_checkpoint_files(model_name_or_path):
 
 if not hasattr(config, "lm_head_generation"):
     config.lm_head_generation = True
+if model_type == "maira2" and not hasattr(config.text_config, "lm_head_generation"):
+    config.text_config.lm_head_generation = True
 num_beams = 1 if args.greedy else 4
 if model_type in ["git", "llava"]:
     config.batch_size = int(args.batch_size) * num_beams
@@ -389,7 +393,13 @@ def get_checkpoint_files(model_name_or_path):
     model = model_class[0].from_pretrained(
         model_name,
         config=config,
-        low_cpu_mem_usage=True,
+        low_cpu_mem_usage=True if model_type != "maira2" else False,
+        torch_dtype=load_dtype,
+        trust_remote_code=True,
+    )
+elif model_type == "maira2":
+    model = model_class[0].from_pretrained(
+        model_name,
         torch_dtype=load_dtype,
         trust_remote_code=True,
     )
@@ -653,6 +663,22 @@ def load_image(image_file):
     input_size = inputs["input_ids"].size(dim=1)
     print("---- Prompt size:", input_size)
     inputs = [prompt] * args.batch_size
+elif model_type == "maira2":
+    from PIL import Image
+    import requests
+
+    def download_and_open(url: str) -> Image.Image:
+        response = requests.get(url, headers={"User-Agent": "MAIRA-2"}, stream=True)
+        return Image.open(response.raw)
+
+    prompt = args.prompt
+    sample = download_and_open(args.image_url)
+    process_input_func = (
+        tokenizer.process_reporting_input
+        if hasattr(tokenizer, "process_reporting_input")
+        else tokenizer.format_and_preprocess_reporting_input
+    )
+    inputs = [prompt] * args.batch_size
 else:
     # input tokens
     input_sentences = []
@@ -719,6 +745,19 @@ def generate():
         raw_image = [raw_image] * args.batch_size
         input_tokens = tokenizer(raw_image, prompt, return_tensors="pt")
         input_ids = input_tokens["input_ids"]
+    elif model_type == "maira2":
+        input_tokens = process_input_func(
+            current_frontal=sample,
+            current_lateral=None,
+            prior_frontal=None,
+            indication=None,
+            technique=None,
+            comparison=None,
+            prior_report=None,
+            return_tensors="pt",
+            get_grounding=False,
+        )
+        input_ids = input_tokens["input_ids"]
     else:
         input_tokens = tokenizer.batch_encode_plus(
             inputs, return_token_type_ids=False, return_tensors="pt"
@@ -743,7 +782,11 @@ def generate():
         for i, o in zip(input_tokens_lengths, output_tokens_lengths)
     ]
     gen_text = tokenizer.batch_decode(
-        gen_ids[:, input_ids.shape[1] :] if model_type == "llava" else gen_ids,
+        (
+            gen_ids[:, input_ids.shape[1] :]
+            if model_type in ["llava", "maira2"]
+            else gen_ids
+        ),
         skip_special_tokens=True,
     )
 
 
@@ -1,2 +1,2 @@
-transformers==4.45.0
+transformers==4.46.2
 neural-compressor==2.4.1
@@ -579,6 +579,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
                 "phi-3": ("/phi-3_local_shard"),
                 "phi": ("/phi_local_shard"),
                 "whisper": ("/whisper_local_shard"),
+                "maira": ("/maira2_local_shard"),
             }
             model_type = next(
                 (
 
@@ -129,6 +129,8 @@
 )
 if model_type == "llama" and args.vision_text_model:
     model_type = "mllama"
+if model_type == "maira-2":
+    model_type = "maira2"
 model_class = MODEL_CLASSES[model_type]
 if args.config_file is None:
     if model_type == "chatglm":
@@ -161,13 +163,15 @@
 
 if not hasattr(config, "lm_head_generation"):
     config.lm_head_generation = True
+if model_type == "maira2" and not hasattr(config.text_config, "lm_head_generation"):
+    config.text_config.lm_head_generation = True
 
 if model_type != "llava":
     model = model_class[0].from_pretrained(
         args.model_id,
         torch_dtype=amp_dtype,
         config=config,
-        low_cpu_mem_usage=True,
+        low_cpu_mem_usage=True if model_type != "maira2" else False,
         trust_remote_code=True,
     )
     tokenizer = model_class[1].from_pretrained(args.model_id, trust_remote_code=True)
@@ -228,6 +232,14 @@ def load_image(image_file):
             raw_image = Image.open(image_file)
         return raw_image
 
+elif re.search("maira2", model.config.architectures[0], re.IGNORECASE):
+    from PIL import Image
+    import requests
+
+    def download_and_open(url: str) -> Image.Image:
+        response = requests.get(url, headers={"User-Agent": "MAIRA-2"}, stream=True)
+        return Image.open(response.raw)
+
 
 if re.search("llava", model.config.architectures[0], re.IGNORECASE):
     model_name = get_model_name_from_path(args.model_id)
@@ -305,6 +317,14 @@ def trace_handler(prof):
     elif model_type == "whisper":
         prompt = sample[0]
         generate_kwargs.pop("min_new_tokens", None)
+    elif model_type == "maira2":
+        prompt = args.prompt
+        sample = download_and_open(args.image_url)
+        process_input_func = (
+            tokenizer.process_reporting_input
+            if hasattr(tokenizer, "process_reporting_input")
+            else tokenizer.format_and_preprocess_reporting_input
+        )
     else:
         # input prompt
         current_path = pathlib.Path(__file__).parent.resolve()
@@ -375,12 +395,30 @@ def trace_handler(prof):
                 inputs = tokenizer(raw_image, prompt, return_tensors="pt")
                 input_ids = inputs["input_ids"]
                 output = model.generate(**inputs, **generate_kwargs)
+            elif model_type == "maira2":
+                processed_inputs = process_input_func(
+                    current_frontal=sample,
+                    current_lateral=None,
+                    prior_frontal=None,
+                    indication=None,
+                    technique=None,
+                    comparison=None,
+                    prior_report=None,
+                    return_tensors="pt",
+                    get_grounding=False,
+                )
+                input_ids = processed_inputs["input_ids"]
+                output = model.generate(**processed_inputs, **generate_kwargs)
             else:
                 input_ids = tokenizer(prompt, return_tensors="pt").input_ids
                 output = model.generate(input_ids, **generate_kwargs)
             gen_ids = output[0] if args.token_latency else output
             gen_text = tokenizer.batch_decode(
-                gen_ids[:, input_ids.shape[1] :] if model_type == "llava" else gen_ids,
+                (
+                    gen_ids[:, input_ids.shape[1] :]
+                    if model_type in ["llava", "maira2"]
+                    else gen_ids
+                ),
                 skip_special_tokens=True,
             )
             toc = time.time()
@@ -441,6 +479,19 @@ def trace_handler(prof):
                         raw_image = [load_image(args.image_url)] * args.batch_size
                         inputs = tokenizer(raw_image, prompt, return_tensors="pt")
                         output = model.generate(**inputs, **generate_kwargs)
+                    elif model_type == "maira2":
+                        processed_inputs = process_input_func(
+                            current_frontal=sample,
+                            current_lateral=None,
+                            prior_frontal=None,
+                            indication=None,
+                            technique=None,
+                            comparison=None,
+                            prior_report=None,
+                            return_tensors="pt",
+                            get_grounding=False,
+                        )
+                        output = model.generate(**processed_inputs, **generate_kwargs)
                     else:
                         input_ids = tokenizer(prompt, return_tensors="pt").input_ids
                         output = model.generate(input_ids, **generate_kwargs)
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-transformers==4.45.0`
	`1`	`+transformers==4.46.2`
`2`	`2`	`neural-compressor==2.4.1`
Original file line number	Diff line number	Diff line change
`@@ -579,6 +579,7 @@ def main(args_in: Optional[List[str]] = None) -> None:`
`579`	`579`	`"phi-3": ("/phi-3_local_shard"),`
`580`	`580`	`"phi": ("/phi_local_shard"),`
`581`	`581`	`"whisper": ("/whisper_local_shard"),`
	`582`	`+ "maira": ("/maira2_local_shard"),`
`582`	`583`	`}`
`583`	`584`	`model_type = next(`
`584`	`585`	`(`