ComfyUI_StoryDiffusion/Storydiffusion_node.py at main · smthemex/ComfyUI_StoryDiffusion · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
 # !/usr/bin/env python
# -*- coding: UTF-8 -*-
import random
import logging
import numpy as np
import torch
import os
from PIL import ImageFont,Image
import torch.nn.functional as F
import copy
from pathlib import PureWindowsPath
from tqdm import tqdm
from .utils.utils import get_comic
from .model_loader_utils import  (phi2narry,replicate_data_by_indices,get_float,gc_cleanup,tensor_to_image,photomaker_clip,tensortopil_list_upscale,tensortopil_list,extract_content_from_brackets_,
                                  narry_list_pil,pre_text2infer,cf_clip,get_phrases_idx_cf,get_eot_idx_cf,get_ms_phrase_emb,get_extra_function,photomaker_clip_v2,adjust_indices,load_clip_clipvsion,
                                  get_scheduler,apply_style_positive,load_lora_for_unet_only,tensortolist,
                                  nomarl_upscale,SAMPLER_NAMES,SCHEDULER_NAMES,lora_lightning_list)

from .utils.gradio_utils import cal_attn_indice_xl_effcient_memory,is_torch2_available
from .ip_adapter.attention_processor import IPAttnProcessor2_0
if is_torch2_available():
    from .utils.gradio_utils import AttnProcessor2_0 as AttnProcessor
else:
    from .utils.gradio_utils import AttnProcessor

import folder_paths
from comfy.model_management import total_vram
import comfy
import latent_preview


device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
dir_path = os.path.dirname(os.path.abspath(__file__))


weigths_gguf_current_path = os.path.join(folder_paths.models_dir, "gguf")
if not os.path.exists(weigths_gguf_current_path):
    os.makedirs(weigths_gguf_current_path)
folder_paths.add_model_folder_path("gguf", weigths_gguf_current_path) # use gguf dir

global total_count, attn_count_, cur_step, mask1024, mask4096, attn_procs_, unet_,sa32, sa64,write,height_s, width_s

infer_type_g=torch.float16 if device=="cuda" else torch.float32 #TODO


class EasyFunction_Lite:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": {
                            "repo1": ("STRING", { "default": ""}),
                            "repo2": ("STRING", { "default": ""}),
                            "unet":(["none"] +folder_paths.get_filename_list("diffusion_models"),),
                            "gguf": (["none"] + folder_paths.get_filename_list("gguf"),),
                            "clip1": (["none"] + folder_paths.get_filename_list("clip"),),
                            "clip2": (["none"] + folder_paths.get_filename_list("clip"),),
                            "clip_vision1": (["none"] + folder_paths.get_filename_list("clip_vision"),),
                            "clip_vision2": (["none"] + folder_paths.get_filename_list("clip_vision"),),
                            "lora1": (["none"] +folder_paths.get_filename_list("loras"),),
                            "lora2": (["none"] +folder_paths.get_filename_list("loras"),),
                            "controlnet":(["none"] +folder_paths.get_filename_list("controlnet"),),
                            "special_mode": (["none","tag", "glm"],),
                            "tag_temperature": (
                                 "FLOAT", {"default": 0.7, "min": 0.1, "max": 1.0, "step": 0.1, "round": 0.01}),
                             }}

    RETURN_TYPES = ("MODEL","CLIP","STORY_CONDITIONING_1")
    RETURN_NAMES = ("model","clip","info")
    FUNCTION = "easy_function_main"
    CATEGORY = "Storydiffusion"

    def easy_function_main(self, repo1,repo2,unet,gguf,clip1,clip2,clip_vision1,clip_vision2,lora1,lora2,controlnet,special_mode,tag_temperature):

        repo1_path=PureWindowsPath(repo1).as_posix() if repo1 else None
        repo2_path=PureWindowsPath(repo2).as_posix() if repo2 else None
        gguf_path=folder_paths.get_full_path("gguf", gguf) if gguf != "none" else None
        unet_path=folder_paths.get_full_path("unet", unet) if unet != "none" else None
        clip1_path=folder_paths.get_full_path("clip", clip1) if clip1 != "none" else None
        clip2_path=folder_paths.get_full_path("clip", clip2) if clip2 != "none" else None
        lora1_path  =folder_paths.get_full_path("loras", lora1) if lora1 != "none" else None
        lora2_path  =folder_paths.get_full_path("loras", lora2) if lora2 != "none" else None
        clip_vision1_path=folder_paths.get_full_path("clip_vision", clip_vision1) if clip_vision1 != "none" else None
        clip_vision2_path=folder_paths.get_full_path("clip_vision", clip_vision2) if clip_vision2 != "none" else None
        controlnet_path=folder_paths.get_full_path("controlnet", controlnet) if controlnet != "none" else None

        clip_glm,tag_model,pipe,svdq_repo=None,None,None,None

        repo_list=[i for i in [repo1_path,repo2_path] if i is not None]

        if repo1_path is not None or repo2_path is not None:

            find_svdq=[i for i in repo_list if "svdq" in i ]
            svdq_repo=find_svdq[0] if find_svdq else None


        if special_mode=="tag":
            from .model_loader_utils import StoryLiteTag
            tag_model = StoryLiteTag(device, tag_temperature,repo2_path, repo1_path) #No repo will load default

        elif special_mode=="glm": #kolor only
            from .model_loader_utils import GLM_clip
            if clip1_path is not None:
                clip_glm=GLM_clip(dir_path,clip1_path)
            elif clip2_path is not None and clip1_path is None:
                clip_glm=GLM_clip(dir_path,clip2_path)
            else:
                clip_glm=None
        else:
            pass


        info={"gguf_path":gguf_path,"unet_path":unet_path,"tag_model":tag_model,"clip_vision1_path":clip_vision1_path,"controlnet_path":controlnet_path,
              "clip1_path":clip1_path,"clip2_path":clip2_path,"repo1_path":repo1_path,"repo2_path":repo2_path,"svdq_repo":svdq_repo,
              "lora1_path":lora1_path,"lora2_path":lora2_path,"clip_vision2_path":clip_vision2_path,}

        return (pipe,clip_glm,info)


class StoryDiffusion_Apply:
    @classmethod
    def INPUT_TYPES(s):
        return {"required":
                {
                    "model": ("MODEL",),
                    "vae": ("VAE",),
                    "infer_mode": (["story", "classic","flux_pulid","infiniteyou","uno","realcustom","instant_character","dreamo","qwen_image","flux_omi","bagel_edit","story_maker","story_and_maker","consistory","kolor_face","msdiffusion" ],),
                    "photomake_ckpt": (["none"] + [i for i in folder_paths.get_filename_list("photomaker") if "v1" in i or "v2" in i],),
                    "ipadapter_ckpt": (["none"] + folder_paths.get_filename_list("photomaker"),),
                    "quantize_mode": ([ "fp8", "nf4","fp16", ],),
                    "lora_scale": ("FLOAT", {"default": 0.8, "min": 0.1, "max": 1.0, "step": 0.1}),
                    "extra_function":("STRING", {"default": ""}),
                            },
                "optional":{

                    "info": ("STORY_CONDITIONING_1",),
                    "CLIP_VISION": ("CLIP_VISION",),
                            }
                }

    RETURN_TYPES = ("MODEL","DIFFCONDI",)
    RETURN_NAMES = ("model","switch",)
    FUNCTION = "main_apply"
    CATEGORY = "Storydiffusion"

    def main_apply(self,model,vae,infer_mode,photomake_ckpt,ipadapter_ckpt,quantize_mode,lora_scale,extra_function, **kwargs):
        print(f"infer model is {infer_mode}")
        extra_info=kwargs.get("info",{})

        # pre data
        photomake_ckpt_path = None if photomake_ckpt == "none"  else  folder_paths.get_full_path("photomaker", photomake_ckpt)
        ipadapter_ckpt_path = None if ipadapter_ckpt == "none"  else  folder_paths.get_full_path("photomaker", ipadapter_ckpt)

        if extra_function:
            extra_function=PureWindowsPath(extra_function).as_posix()

        save_quantezed=True if "save" in extra_function and quantize_mode=="fp8" else False

        clip_vision1_path=extra_info.get("clip_vision1_path") if extra_info else None
        clip_vision2_path=extra_info.get("clip_vision2_path") if extra_info else None
        repo1_path=extra_info.get("repo1_path",None)
        repo2_path=extra_info.get("repo2_path",None)
        lora1_path=extra_info.get("lora1_path") if extra_info else None
        lora2_path=extra_info.get("lora2_path") if extra_info else None
        unet_path=extra_info.get("unet_path") if extra_info else None
        gguf_path=extra_info.get("gguf_path") if extra_info else None

        repo_list=[i for i  in [repo1_path,repo2_path] if i is not None]
        lora_list=[i for i in [lora1_path,lora2_path] if i is not None]

        dreamo_version="v1.0" if "v1.0" in extra_function else "v1.1"

        vae_encoder,vae_downsample_factor,vae_config,vision_model_config_ar,image_proj_model,no_dif_quantization,find_Kolors=None,None,None,None,None,False,None

        # per clip vision
        CLIP_VISION=kwargs.get("CLIP_VISION")
        unet_type=torch.float16 #use for sdxl

        if infer_mode=="flux_pulid" or infer_mode=="kolor_face":# 2种加载clip vision的方式
            from comfy.clip_vision import load as clip_load
            if CLIP_VISION is not None:
                clip_vision_path=CLIP_VISION
            elif  clip_vision1_path is not None:
                clip_vision_path=clip_load(clip_vision1_path).model
            elif clip_vision2_path is not None:
                clip_vision_path=clip_load(clip_vision2_path).model
            else:
                if infer_mode=="kolor_face":
                    pass
                else:
                    raise ValueError("Please specify one of CLIP_VISION or clip_vision1_path or clip_vision2_path")

        if infer_mode=="msdiffusion" or infer_mode in ["story_maker" ,"story_and_maker"]:
            if CLIP_VISION is not None:
                pass
            else:
                from comfy.clip_vision import load as clip_load
                if  clip_vision1_path is not None:
                    CLIP_VISION=clip_load(clip_vision1_path).model
                elif clip_vision2_path is not None:
                    CLIP_VISION=clip_load(clip_vision2_path).model
                else:
                    raise ValueError("Please provide a CLIP_VISION or CLIP_VISION1 or CLIP_VISION2,Msdiffusion need a clipvison g model,story_maker need a clipvison H model")

        # pre dreamo lora
        if infer_mode =="dreamo" and  lora1_path is not None and lora2_path is not None:
            if "distill" in lora1_path.lower():
                cfg_distill_path=lora1_path
                dreamo_lora_path=lora2_path
            else:
                cfg_distill_path=lora2_path
                dreamo_lora_path=lora1_path
        else:
            cfg_distill_path=None
            dreamo_lora_path=None


        if infer_mode in ["story_maker" ,"story_and_maker"] and ipadapter_ckpt_path is None:
             raise "story_maker need a mask.bin"

        # check vram only using in flux pulid or UNO
        if total_vram > 45000.0:
            aggressive_offload = False
            offload = False
        elif 17000.0 < total_vram < 45000.0:
            aggressive_offload = False
            offload = True
        else:
            aggressive_offload = True
            offload = True
        edit_mode=None
        logging.info(f"total_vram is {total_vram},aggressive_offload is {aggressive_offload},offload is {offload}")

        if infer_mode in["story", "story_and_maker","msdiffusion"]:# mix mode,use maker or ms to make 2 roles in on image
            from .model_loader_utils import Loader_storydiffusion
            model = Loader_storydiffusion(model,photomake_ckpt_path,vae)
        elif infer_mode =="story_maker":
            from .model_loader_utils import Loader_story_maker
            model = Loader_story_maker(model,ipadapter_ckpt_path,vae,False,lora_scale)
        elif infer_mode == "flux_pulid":
            from .PuLID.app_flux import get_models
            from .model_loader_utils import Loader_Flux_Pulid
            if unet_path is None:
                raise "PuLID can't link a normal comfyui model,you need load a flux unet model "
            model_=get_models("flux-dev",unet_path,False,aggressive_offload,device=device,offload=offload,quantized_mode=quantize_mode,)
            model = Loader_Flux_Pulid(model_,model,ipadapter_ckpt_path,quantize_mode,aggressive_offload,offload,False,clip_vision_path)
        elif infer_mode == "infiniteyou":
            from .model_loader_utils import Loader_InfiniteYou
            assert extra_info ,"you need to provide extra_info"
            model,image_proj_model = Loader_InfiniteYou(extra_info,vae,quantize_mode)
        elif infer_mode == "consistory":
            from .model_loader_utils import load_pipeline_consistory
            model = load_pipeline_consistory(model,vae)
        elif infer_mode == "instant_character":
            from .model_loader_utils import load_pipeline_instant_character
            assert extra_info ,"you need to provide extra_info"
            model = load_pipeline_instant_character(extra_info,ipadapter_ckpt_path,vae,quantize_mode)
        elif infer_mode == "realcustom":
            from .model_loader_utils import load_pipeline_realcustom,load_realcustom_vae
            if ipadapter_ckpt_path is None:
                raise "realcustom need a realcustom model which in photomaker folder, and  chocie it in ipadapter_ckpt_path"
            model,vision_model_config_ar,_ = load_pipeline_realcustom(model,ipadapter_ckpt_path)
            vae_encoder,vae_downsample_factor,vae_config=load_realcustom_vae(vae,device)
        elif infer_mode == "kolor_face":
            from .model_loader_utils import Loader_KOLOR

            find_Kolors =[i for i in repo_list if  "kolor"  in i.lower()]
            if not find_Kolors:
                raise ValueError("No Kolor model found in the repo")
            model = Loader_KOLOR(find_Kolors[0],clip_vision_path,ipadapter_ckpt_path)
        elif infer_mode == "uno":
            from .model_loader_utils import Loader_UNO
            model = Loader_UNO(extra_info,offload,quantize_mode,save_quantezed,lora_rank=512)
        elif infer_mode == "dreamo":
            from.model_loader_utils import Loader_Dreamo
            if dreamo_lora_path is None or cfg_distill_path is None or ipadapter_ckpt_path is None:
                raise "dreamo need a dreamo lora and cfg distill and turbo lora in ipadapter menu"
            model = Loader_Dreamo(extra_info,vae,quantize_mode,dreamo_lora_path,cfg_distill_path,ipadapter_ckpt_path,device,dreamo_version)
        elif infer_mode == "bagel_edit":
            from .Bagel.app import load_bagel_model

            if not repo_list :
                raise "EasyFunction_Lite node repo1 or repo2 must fill bagel repo"
            max_mem_per_gpu=str(int(total_vram/1000))+"GIB"
            model = load_bagel_model(repo_list[0],quantize_mode,max_mem_per_gpu)
        elif infer_mode == "flux_omi":
            from .model_loader_utils import Loader_Flux_Diffuser

            no_dif_quantization=True if extra_info.get("unet_path") or extra_info.get("svdq_repo") or extra_info.get("gguf_path") else False
            model = Loader_Flux_Diffuser(extra_info,ipadapter_ckpt_path,vae,quantize_mode)
        elif infer_mode == "qwen_image":
            from.qwen_image.inferencer import load_quwen_image

            df_repo=repo_list[0] if repo_list else None

            model,edit_mode = load_quwen_image(cpu_offload=True,cpu_offload_blocks=16,no_pin_memory=True, dir_path =dir_path, repo=df_repo,unet_path=unet_path,gguf_path=gguf_path,lora_path=lora_list[0] if lora_list else None)
        else:  # can not choice a mode
            print("infer use comfyui classic mode")

        story_img=True if photomake_ckpt_path and infer_mode in["story","story_maker","story_and_maker","msdiffusion"] else False
        model_=model if infer_mode=="flux_pulid" or story_img else None
        switch={"infer_mode":infer_mode,"ipadapter_ckpt_path":ipadapter_ckpt_path,"photomake_ckpt_path":photomake_ckpt_path,
                "vision_model_config_ar":vision_model_config_ar,"no_dif_quantization":no_dif_quantization,"edit_mode":edit_mode,
                       "lora_scale":lora_scale,"image_proj_model":image_proj_model, "vae_encoder":vae_encoder,"vae_downsample_factor":vae_downsample_factor,"vae_config":vae_config,"dreamo_version":dreamo_version,
                       "CLIP_VISION":CLIP_VISION,"VAE":vae,"find_Kolors":find_Kolors,"model_":model_,"unet_type":unet_type,"extra_function":extra_function,}
        switch.update(extra_info)
        return (model,switch,)


class StoryDiffusion_CLIPTextEncode:
    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "clip": ("CLIP",),
                "switch": ("DIFFCONDI", ),
                "width": ("INT", {"default": 768, "min": 256, "max": 2048, "step": 16, "display": "number"}),
                "height": ("INT", {"default": 768, "min": 256, "max": 2048, "step": 16, "display": "number"}),
                "role_text": ("STRING", {"multiline": True,"default": "[Taylor] a woman img, wearing a white T-shirt, blue loose hair.\n""[Lecun] a man img,wearing a suit,black hair."}),
                "scene_text":("STRING", {"multiline": True,
                                             "default": "[Taylor] wake up in the bed ;\n[Taylor] have breakfast by the window;\n[Lecun] driving a car;\n[Lecun] is working."}),
                "pos_text": ("STRING", {"multiline": True,"default": ",best"}),
                "neg_text": ("STRING", {"multiline": True,
                                               "default": "bad anatomy, bad hands, missing fingers, extra fingers,three hands, three legs, bad arms, missing legs, missing arms, poorly drawn "
                                                          "face, bad face, fused face, cloned face, three crus, fused feet, fused thigh, extra crus, ugly fingers, horn,amputation, disconnected limbs"}),
                "lora_trigger_words": ("STRING", {"default": "best quality"}),
                "add_style": (["No_style", "Realistic", "Japanese_Anime", "Digital_Oil_Painting", "Pixar_Disney_Character","Photographic", "Comic_book","Line_art", "Black_and_White_Film_Noir", "Isometric_Rooms"],),
                "mask_threshold": (
                    "FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.1, "round": 0.01}),
                "extra_param":("STRING", {"default": ""}),
                "guidance_list": ("STRING", {"multiline": True, "default": "0., 0.25, 0.4, 0.75;0.6, 0.25, 1., 0.75"}),
            },
            "optional": {
                         "image":("IMAGE",),
                         "control_image":("IMAGE",),
                         }
        }
    RETURN_TYPES = ("CONDITIONING","CONDITIONING","DIFFINFO","INT","INT",)
    RETURN_NAMES = ("positive", "negative","condition","width","height",)
    FUNCTION = "encode"
    CATEGORY = "Storydiffusion"


    def encode(self, clip,switch,width,height, role_text,scene_text,pos_text,neg_text,lora_trigger_words,add_style,mask_threshold,extra_param,guidance_list,**kwargs):
        infer_mode=switch.get("infer_mode")
        CLIP_VISION = switch.get("CLIP_VISION")
        extra_function=switch.get("extra_function")
        photomake_ckpt_path=switch.get("photomake_ckpt_path")
        unet_type=switch.get("unet_type")
        model_=switch.get("model_")
        vae=switch.get("VAE")


        image=kwargs.get("image",None)
        control_image=kwargs.get("control_image",None)

        lora_list=[i for i in [switch.get("lora1_path"),switch.get("lora2_path")] if i is not None]
        repo_list=[i for i in [switch.get("repo1_path"),switch.get("repo2_path")] if i is not None]

        use_lora =True if lora_list else False


        if extra_function:
            extra_function=PureWindowsPath(extra_function).as_posix()
        tag_list,text_model,vision_model,siglip_path,dino_path=None,None,None,None,None,

        # 反推功能
        if switch.get("tag_model") is not None and isinstance(image,torch.Tensor):
            tag_img_list=tensortopil_list(image)
            tag_list=[]
            for i in tag_img_list:
                tag_text=switch.get("tag_model").run_tag(i)
                tag_list.append(tag_text)


        siglip_path_=switch.get("clip_vision1_path")
        dino_path_=switch.get("clip_vision2_path")
        clip1_path=switch.get("clip1_path")
        clip2_path=switch.get("clip2_path")


        siglip_path, dino_path = siglip_path_, dino_path_

        if (siglip_path_ is not None and dino_path_ is not None and
            "sig" in dino_path_ and "dino" in siglip_path_):
            siglip_path, dino_path = dino_path_, siglip_path_i


        if infer_mode == "realcustom" and (siglip_path is None or dino_path is None):
            print ("if use realcustom mode, u must linke add_function to your node,if not ,will auto load clip_vision_path and clip_path")
            siglip_path="" if siglip_path is None else siglip_path
            dino_path="" if dino_path is None else dino_path


        auraface,use_photov2,img2img_mode,cached,inject,onnx_provider,dreamo_mode,trigger_words_dual,dual_lora_scale=get_extra_function(extra_function,extra_param,photomake_ckpt_path,image,infer_mode)


        (replace_prompts,role_index_dict,invert_role_index_dict,ref_role_index_dict,ref_role_totals,role_list,role_dict,
         nc_txt_list,nc_indexs,positions_index_char_1,positions_index_char_2,positions_index_dual,prompts_dual,index_char_1_list,index_char_2_list)=pre_text2infer(role_text,scene_text,lora_trigger_words,use_lora,tag_list)


        global character_index_dict, invert_character_index_dict, cur_character, ref_indexs_dict, ref_totals, character_dict
        character_index_dict=role_index_dict
        invert_character_index_dict=invert_role_index_dict
        ref_indexs_dict=ref_role_index_dict
        ref_totals=ref_role_totals
        character_dict=role_dict

        _, style_neg = apply_style_positive(add_style, " ") #get n
        neg_text = neg_text + style_neg

        replace_prompts=[i+pos_text for i in replace_prompts]

        only_role_list=[apply_style_positive(add_style,i)[0] for i in replace_prompts]


        if len(role_list)==1:
            role_key_list=role_list*len(only_role_list)
        else:
            role_key_list=replicate_data_by_indices(role_list, index_char_1_list, index_char_2_list)
            role_key_list=[i for i in role_key_list if i is not None]

        if len(role_list)>1: #重新整理prompt排序，便于后续emb和ID的对应
            nc_dual_list=[]
            for i in nc_indexs:
                nc_dual_list.append(i)
            for i in positions_index_dual:
                nc_dual_list.append(i)
            adjusted_a = adjust_indices(index_char_1_list, nc_dual_list)
            adjusted_b = adjust_indices(index_char_2_list, nc_dual_list)
            a_list = [only_role_list[i] for i in adjusted_a if 0 <= i < len(only_role_list)]
            b_list = [only_role_list[i] for i in adjusted_b if 0 <= i < len(only_role_list)]

            inf_list_split=[a_list,b_list]
        else:
            inf_list_split=[only_role_list]


        if not img2img_mode and infer_mode=="flux_pulid":
            raise "flux_pulid mode only support image2image"

        if infer_mode=="msdiffusion" and not prompts_dual:
            raise "use msdiffusion mode need  have [role1] and [role2] in sence txt."

        if img2img_mode and   photomake_ckpt_path is None and infer_mode=="story":
            raise "need chocie photomake v1 or v2 when use img2img mode"

        if infer_mode=="story_maker"  and image is None:
            raise "story_maker only support  img2img mode,you can use story_and_maker or link a iamge"

        if infer_mode=="msdiffusion" and photomake_ckpt_path  and  not img2img_mode:
            raise "if use msdiffusion txt2img mode,can not use photomake_ckpt_path"

        uno_pe="d" #Literal['d', 'h', 'w', 'o']

        only_role_emb_ne,x_1_refs_dict,image_emb,x_1_refs_dual=None,None,None,None

        input_id_images_dict={}
        image_list=[]

        if img2img_mode and image is not None :
             if len(role_list)==1:
                image_pil=tensortopil_list_upscale(image, width, height)
                input_id_images_dict[role_list[0]]=image_pil[0]
                image_list=[image_pil[0]]
             else:
                f1, _, _, _ = image.size()
                img_list = list(torch.chunk(image, chunks=f1))
                image_list = [nomarl_upscale(img, width, height) for img in img_list]
                for index, key in enumerate(role_list):
                    input_id_images_dict[key]=image_list[index]

        if img2img_mode:
            if infer_mode == "story_and_maker" or infer_mode == "story_maker":
                k1, _, _, _ = image.size()
                if k1 == 1:
                    image_emb = [CLIP_VISION.encode_image(image)["penultimate_hidden_states"]]
                else:
                    img_list = list(torch.chunk(image, chunks=k1))
                    image_emb=[]
                    for i in img_list:
                        image_emb.append(CLIP_VISION.encode_image(i)["penultimate_hidden_states"].to(device, dtype=unet_type))
            elif infer_mode=="story":
                image_emb = None  # story的图片模式的emb要从ip的模型里单独拿，
            elif infer_mode=="msdiffusion":
                image_emb = CLIP_VISION.encode_image(image)["penultimate_hidden_states"] #MS分情况，图生图直接在前面拿，文生图在在sample拿
                if not image_emb.is_cuda:#确保emb在cuda,以及type的正确
                    image_emb = image_emb.to(device,dtype=unet_type)
            elif infer_mode == "instant_character":
                from .model_loader_utils import load_dual_clip,instant_character_id_clip
                if extra_function is not None and extra_param:
                    if "sig" in extra_function and  "dino" in extra_param: #TODO
                        siglip_path_i=extra_function
                        dino_path_i=extra_param
                    else:
                        siglip_path_i=extra_param
                        dino_path_i=extra_function
                else:
                    dino_path_i="facebook/dinov2-giant"
                    siglip_path_i="google/siglip-so400m-patch14-384"
                siglip_image_encoder,siglip_image_processor,dino_image_encoder_2,dino_image_processor_2=load_dual_clip(siglip_path_i,dino_path_i,device,torch.bfloat16)
                image_emb=[]
                for img in tensortopil_list(image):
                    id_emb=instant_character_id_clip(img,siglip_image_encoder,siglip_image_processor,dino_image_encoder_2,dino_image_processor_2,device,torch.bfloat16)
                    image_emb.append(id_emb)
                siglip_image_encoder.to("cpu")
                dino_image_encoder_2.to("cpu")
                gc_cleanup()
            elif infer_mode == "flux_pulid":

                # get emb use insightface
                pass
            elif infer_mode == "bagel_edit":
                image_emb=input_id_images_dict
            elif infer_mode == "flux_omi":
                image_emb=input_id_images_dict
            elif infer_mode == "dreamo":
                from .model_loader_utils import Dreamo_image_encoder
                from huggingface_hub import hf_hub_download

                BEN2_path_list=[i for i in repo_list if 'BEN2_Base.pth' in i]
                if not BEN2_path_list:
                    BEN2_path= hf_hub_download(repo_id='PramaLLC/BEN2', filename='BEN2_Base.pth', local_dir='ComfyUI/models')
                else:
                    BEN2_path=BEN2_path_list[0]
                ref_list=tensortopil_list_upscale(image,width,height)
                if control_image is not None and dreamo_mode=="id":#如果是id加ip模式，角色可以多次换装，但是要考虑衣服的数量
                    control_pil_list=tensortopil_list_upscale(control_image,width,height)
                    if len(control_pil_list) != len(only_role_list):
                        raise "when use dreamo id + ip,control image must same size as prompts,dreamo的id模式,多少句角色prompt,就要有多少件衣服在control节点输入"
                    else:
                        if len(role_list)>1:
                                nc_dual_list=[]
                                for i in nc_indexs:
                                    nc_dual_list.append(i)
                                for i in positions_index_dual:
                                    nc_dual_list.append(i)
                                adjusted_a = adjust_indices(index_char_1_list, nc_dual_list)
                                adjusted_b = adjust_indices(index_char_2_list, nc_dual_list)
                                # print("adjusted_a",adjusted_a)
                                # print("adjusted_b",adjusted_b)
                                a_list = [control_pil_list[i] for i in adjusted_a if 0 <= i < len(control_pil_list)]
                                b_list = [control_pil_list[i] for i in adjusted_b if 0 <= i < len(control_pil_list)]

                                control_pil_list=[a_list,b_list]
                        else:
                            control_pil_list=[control_pil_list]
                else:
                    control_pil_list=None

                #task_list=["ip", "id", "style"] #TODO,
                image_emb={}
                if control_pil_list is not None and dreamo_mode=="id":
                    for key,role_img ,control_list in zip(role_list,ref_list,control_pil_list):
                        id_role_emb=[]
                        for i in control_list:
                            role_emb=Dreamo_image_encoder(BEN2_path,role_img,i,dreamo_mode,"ip",ref_res=512) #TODO
                            id_role_emb.append(role_emb)
                        image_emb[key]=id_role_emb
                else:
                    for key,role_img in zip(role_list,ref_list):
                        role_emb=Dreamo_image_encoder(BEN2_path,role_img,None,dreamo_mode,"ip",ref_res=512) #TODO
                        image_emb[key]=[role_emb]*len(only_role_list) #改成列表方便协同id模式

            elif infer_mode == "realcustom":
                if "g" in os.path.basename(clip1_path).lower():
                    clip1_path, clip2_path = clip2_path, clip1_path

                text_model,vision_model=load_clip_clipvsion([clip1_path,clip2_path],
                                                            [os.path.join(dir_path, "config/clip_1"),os.path.join(dir_path, "config/clip_2")],
                                                            dino_path,siglip_path,switch.get("vision_model_config_ar"))

            elif infer_mode == "uno":

                from .UNO.uno.flux.pipeline import preprocess_ref

                ref_pil_list=tensortopil_list(image) #原方法需要图片缩放，先转回pil
                if prompts_dual:
                    x_1_refs_dual_=[phi2narry(preprocess_ref(i, 320)) for i in ref_pil_list ]
                    x_1_refs_dual=[[vae.encode(i[:,:,:,:3]).to(torch.bfloat16) for i in x_1_refs_dual_]*len(prompts_dual)]#[[1,2],[1,2]] 不考虑cn

                if control_image is not None:
                    control_pil_list=tensortopil_list(control_image)
                else:
                    control_pil_list=None

                if control_pil_list is None:# 无控制图时,默认时单图模式,x_1_refs用key取值
                    x_1_refs_dict={}
                    for key ,role_pil,prompts in zip(role_list,ref_pil_list,inf_list_split):
                        role_tensor=phi2narry(preprocess_ref(role_pil, 512))#单图默认512
                        x_1_refs_dict[key]=[[vae.encode(role_tensor[:,:,:,:3]).to(torch.bfloat16)]*len(prompts)] # {key:[[1],[2],key2:[[3],[4]]}
                else:
                    if len(control_pil_list) != len(only_role_list):
                        raise "control image must same size as prompts,多少句话就多少张图"
                    else:
                        if len(role_list)>1:
                            nc_dual_list=[]
                            for i in nc_indexs:
                                nc_dual_list.append(i)
                            for i in positions_index_dual:
                                nc_dual_list.append(i)
                            adjusted_a = adjust_indices(index_char_1_list, nc_dual_list)
                            adjusted_b = adjust_indices(index_char_2_list, nc_dual_list)
                            # print("adjusted_a",adjusted_a)
                            # print("adjusted_b",adjusted_b)
                            a_list = [control_pil_list[i] for i in adjusted_a if 0 <= i < len(control_pil_list)]
                            b_list = [control_pil_list[i] for i in adjusted_b if 0 <= i < len(control_pil_list)]

                            control_pil_list=[a_list,b_list]
                        else:
                            control_pil_list=[control_pil_list]
                        x_1_refs_dict={}
                        for key ,role_pil,control_pil in zip(role_list,ref_pil_list,control_pil_list):
                            mix_list=[]
                            role_tensor=phi2narry(preprocess_ref(role_pil, 320) )#多图默认320
                            for c in (control_pil):
                                c_tensor=phi2narry(preprocess_ref(c, 320))
                                mix_list.append([vae.encode(role_tensor[:,:,:,:3]).to(torch.bfloat16),vae.encode(c_tensor[:,:,:,:3]).to(torch.bfloat16)])
                            x_1_refs_dict[key]=mix_list #{key:[[1,2],[3,4]]}


        # pre insight face model and emb
        if img2img_mode:
            if infer_mode in ["story_and_maker","story_maker","flux_pulid","kolor_face","infiniteyou"] or (use_photov2 and infer_mode=="story"):

                from .model_loader_utils import insight_face_loader,get_insight_dict


                find_mask=[i for i in repo_list if "rmgb" in i.lower()] if repo_list else None
                if find_mask is not None:
                    mask_repo=find_mask[0]
                else:
                    mask_repo="briaai/RMBG-1.4"
                app_face,pipeline_mask,app_face_=insight_face_loader(infer_mode,use_photov2, auraface,onnx_provider,mask_repo)
                image_list=tensortopil_list_upscale(image, 640, 640)

                input_id_emb_s_dict,input_id_img_s_dict,input_id_emb_un_dict,input_id_cloth_dict=get_insight_dict(app_face,app_face_,pipeline_mask,infer_mode,use_photov2,image_list,
                     role_list,control_image,width, height,model_,switch.get("image_proj_model")) # CHECK role_list

            else:
                input_id_emb_s_dict,input_id_img_s_dict,input_id_emb_un_dict,input_id_cloth_dict={}, {}, {}, {}

        else:
            input_id_emb_s_dict,input_id_img_s_dict,input_id_emb_un_dict,input_id_cloth_dict={}, {}, {}, {}


        # pre clip txt emb

        noise_x,inp_neg_list,letent_real=[],[],{}

        if  infer_mode=="consistory":
            only_role_emb=None
        elif infer_mode=="instant_character":
            from .model_loader_utils import cf_flux_prompt_clip
            only_role_emb={}
            for key ,prompts in zip(role_list,inf_list_split):
                emb_list_=[]
                for prompt in prompts:
                    p_,pool_,ind_=cf_flux_prompt_clip(clip,prompt)
                    emb_list_.append([p_,pool_,ind_])
                only_role_emb[key]=emb_list_

        elif infer_mode=="realcustom":
            from .model_loader_utils import realcustom_clip_emb
            samples_per_prompt=1
            guidance_weight=3.5
            role_text_list = role_text.splitlines()
            roel_text_c=''.join(role_text_list)

            if '(' in roel_text_c and ')' in roel_text_c:
                    object_prompt = extract_content_from_brackets_(roel_text_c)  # 提取prompt的object list
                    #print(f"object_prompt:{object_prompt}")
                    object_prompt=[i.strip() for i in object_prompt]
                    for i in object_prompt:
                        if " " in i:
                            raise "when using [object],object must be a word,any blank in it will cause error."

                    object_prompt=[i for i in object_prompt ]
                    target_phrases = sorted(list(set(object_prompt)),key=lambda x: list(object_prompt).index(x))  # 清除同名物体,保持原有顺序
                    #print(f"object_prompt:{phrases}",len(phrases))
                    assert  len(target_phrases)>=2,"when using msdiffusion ,object must be more than 2."
                    if len(target_phrases)>2:
                        target_phrases=target_phrases[:2] #只取前两个物体
            else:
                raise "when using realcustom ,(objectA)  and (objectA) must be in the role prompt."

            print(f"object_prompt:{target_phrases}")
            image_list=tensortopil_list_upscale(image, width, height)

            only_role_emb,letent_real={},{}

            for key ,prompts,role_image,target_phrase in zip(role_list,inf_list_split,image_list,target_phrases):
                emb_dict_real_list,latent_dict_list=[],[]
                for p,n in zip(prompts,[neg_text]*len(prompts)):

                    emb_dict_real,latent_dict=realcustom_clip_emb(text_model,vision_model,switch.get("vae_config"),switch.get("vae_downsample_factor"),p,n,role_image,target_phrase,
                                                                width,height,device,samples_per_prompt,guidance_weight)
                    emb_dict_real_list.append(emb_dict_real)
                    latent_dict_list.append(latent_dict)
                only_role_emb[key]=emb_dict_real_list
                letent_real[key]=latent_dict_list
            vision_model.to("cpu")
            gc_cleanup()
        elif infer_mode=="flux_pulid":
            from .PuLID.flux.util import load_clip, load_t5
            from .PuLID.app_flux  import get_emb_flux_pulid

            #repo_in="flux-dev" if not repo else repo
            if_repo =False
            t5_ = load_t5("flux-dev",clip,if_repo,device, max_length=128)
            clip_ = load_clip("flux-dev",clip,if_repo,device)
            only_role_emb,noise_x,inp_neg_list={},{},{}

            for key ,prompts in zip(role_list,inf_list_split):
                ip_emb,inp_n=[],[]

                for p,n in zip(prompts,[neg_text]*len(prompts)):
                    inp,inp_neg=get_emb_flux_pulid(t5_,clip_,if_repo,p,n,width,height,num_steps=20,guidance=3.5,device=device)
                    ip_emb.append(inp)
                    inp_n.append(inp_neg)
                only_role_emb[key]=ip_emb
                inp_neg_list[key]=inp_n
        elif infer_mode == "uno":
            only_role_emb={}
            from .UNO.uno.flux.sampling import prepare_multi_ip_wrapper

            for key ,prompts in zip(role_list,inf_list_split):
                ip_emb=[]
                for p,x_1 in zip(prompts,x_1_refs_dict[key]):
                    inp = prepare_multi_ip_wrapper(clip,prompt=p, ref_imgs=x_1, pe=uno_pe,device=device,h=height,w=width)
                    ip_emb.append(inp)
                only_role_emb[key]=ip_emb
        elif infer_mode=="kolor_face":
            from .model_loader_utils import glm_single_encode
            from .kolors.models.tokenization_chatglm import ChatGLMTokenizer

            tokenizer = ChatGLMTokenizer.from_pretrained(os.path.join(switch.get("find_Kolors"),'text_encoder'))
            assert clip is not None, "clip is None,check your clip path"
            chatglm3_model = {
                'text_encoder': clip,
                'tokenizer': tokenizer
                }
            only_role_emb,only_role_emb_ne=glm_single_encode(chatglm3_model, inf_list_split,role_list, neg_text, 1)
        elif infer_mode=="flux_omi" and  switch.get("no_dif_quantization"): #need comfyclip
            from .model_loader_utils import cf_flux_prompt_clip
            only_role_emb={}
            for key ,prompts in zip(role_list,inf_list_split):
                emb_list_=[]
                for prompt in prompts:
                    p_,pool_,ind_=cf_flux_prompt_clip(clip,prompt)
                    emb_list_.append([p_,pool_,ind_])
                only_role_emb[key]=emb_list_

        else:
            if photomake_ckpt_path is not None and img2img_mode and infer_mode in["story","story_and_maker","msdiffusion"]: #img2img模式下SDXL的story的clip要特殊处理，有2个imgencoder进程，所以分离出来 TODO
                if use_photov2:
                    if len(role_list)==1:
                        emb_dict=photomaker_clip_v2(clip,model_,only_role_list,neg_text,image_list,input_id_emb_s_dict[role_key_list[0]][0])
                        only_role_emb=[emb_dict]
                    else:

                        only_role_emb=[]
                        for role_list_s,role_list_id,key in zip(inf_list_split,image_list,role_key_list):
                            #print(input_id_emb_s_dict[key][0].shape) #torch.Size([512])
                            emb_dict=photomaker_clip_v2(clip,model_,role_list_s,neg_text,[role_list_id],input_id_emb_s_dict[key][0])
                            only_role_emb.append(emb_dict)

                else:
                    if len(role_list)==1:
                        emb_dict=photomaker_clip(clip,model_,only_role_list,neg_text,image_list)
                        only_role_emb=[emb_dict]
                    else:

                        only_role_emb=[]
                        for role_list_s,role_list_id in zip(inf_list_split,image_list):
                            emb_dict=photomaker_clip(clip,model_,role_list_s,neg_text,[role_list_id])
                            only_role_emb.append(emb_dict)

            else:
                if infer_mode=="classic": #TODO 逆序的角色会出现iD不匹配，受影响的有story文生图
                    only_role_emb= cf_clip([only_role_list], clip, infer_mode,role_list) #story模式需要拆分prompt，所以这里需要传入role_list
                elif infer_mode=="dreamo" or infer_mode=="bagel_edit" or (infer_mode=="flux_omi" and not switch.get("no_dif_quantization")):
                    pass # TODO 暂时不支持dreamo
                elif infer_mode=="qwen_image":
                    from .qwen_image.inferencer import get_emb_data
                    image_list_=tensortolist(image,width,height)
                    only_role_emb= get_emb_data(clip,vae,inf_list_split,image_list_,role_list)
                else:
                    only_role_emb= cf_clip(inf_list_split, clip, infer_mode,role_list)  #story,story_maker,story_and_maker,msdiffusion,infinite
                    if len (role_list)==1 :
                        only_role_emb_dict={}
                        only_role_emb_dict[role_list[0]]=only_role_emb # [ [cond_p, output_p],...]
                        only_role_emb=only_role_emb_dict
        # pre nc txt emb
        if nc_txt_list and not infer_mode=="consistory":
            nc_txt_list=[i+pos_text for i in nc_txt_list]
            if photomake_ckpt_path is not None and img2img_mode and infer_mode in["story","story_maker","story_and_maker","msdiffusion"]: #img2img模式下SDXL的story的clip要特殊处理，有2个imgencoder进程，所以分离出来 TODO
                nc_emb=[]
                for  i  in nc_txt_list:
                    if use_photov2:
                        empty_emb_zero = torch.zeros_like(input_id_emb_s_dict[role_list[0]][0]).to(device,dtype=torch.float16)
                        emb_dict_=photomaker_clip_v2(clip,model_,[i],neg_text,image_list,empty_emb_zero,nc_flag=True)
                    else:
                        emb_dict_=photomaker_clip(clip,model_,[i],neg_text,image_list,nc_flag=True)
                    nc_emb.append(emb_dict_)
            else:
                if infer_mode!="kolor_face":
                    nc_emb=cf_clip(nc_txt_list, clip, infer_mode,role_list,input_split=False)
                elif infer_mode=="dreamo" or infer_mode=="bagel_edit" or infer_mode=="flux_omi":
                    pass # TODO 暂时不支持dreamo

                else:
                    nc_emb,_= glm_single_encode(chatglm3_model, nc_txt_list,role_list, neg_text, 1,nc_mode=True)
        else:
            nc_emb=None
        # pre dual role txt emb
        grounding_kwargs=None
        cross_attention_kwargs=None
        if prompts_dual and infer_mode in["story_maker","story_and_maker","msdiffusion"] : #忽略不支持的模式

            if infer_mode=="msdiffusion": #[A] a (pig) play whith [B]  a (doll) in the garden
                prompts_dual=[i.replace(role_list[0] ,role_dict[role_list[0]]) for i in prompts_dual if role_list[0] in i ]
                prompts_dual = [i.replace(role_list[1], role_dict[role_list[1]]) for i in prompts_dual if role_list[1] in i]
                if '(' in prompts_dual[0] and ')' in prompts_dual[0]:
                    object_prompt = extract_content_from_brackets_(prompts_dual[0])  # 提取prompt的object list
                    #print(f"object_prompt:{object_prompt}")
                    object_prompt=[i.strip() for i in object_prompt]
                    for i in object_prompt:
                        if " " in i:
                            raise "when using [object],object must be a word,any blank in it will cause error."

                    object_prompt=[i for i in object_prompt ]
                    phrases = sorted(list(set(object_prompt)),key=lambda x: list(object_prompt).index(x))  # 清除同名物体,保持原有顺序
                    #print(f"object_prompt:{phrases}",len(phrases))
                    assert  len(phrases)>=2,"when using msdiffusion ,object must be more than 2."
                    if len(phrases)>2:
                        phrases=phrases[:2] #只取前两个物体
                else:
                    raise "when using msdiffusion ,(objectA)  and (objectA) must be in the prompt."
                if use_lora:
                    prompts_dual=[i+lora_trigger_words for i in prompts_dual]

                prompts_dual=[apply_style_positive(add_style,i+pos_text)[0] for i in prompts_dual] #[' T a (pig)  play whith  a (doll) in the garden,best 8k,RAW']


                prompts_dual=[i.replace("("," ").replace(")"," ") for i in prompts_dual] #clear the bracket

                box_add = []  # 获取预设box
                guidance_list = guidance_list.strip().split(";")
                for i in range(len(guidance_list)):
                    box_add.append(get_float(guidance_list[i]))

                if mask_threshold == 0:
                    mask_threshold = None
                if mask_threshold:
                    boxes = [box_add[:2]]  # boxes = [[[0., 0.25, 0.4, 0.75], [0.6, 0.25, 1., 0.75]]]  # man+women
                else:
                    boxes = [[[0, 0, 0, 0], [0, 0, 0, 0]]]  # used if you want no layout guidance
                print(f"Roles position on {boxes}")
                from transformers import CLIPTokenizer
                tokenizer_=CLIPTokenizer.from_pretrained(os.path.join(dir_path, "local_repo/tokenizer"))
                for i in prompts_dual:
                    phrase_idxes = [get_phrases_idx_cf(tokenizer_, phrases[0], i)]
                    eot_idxes = [[get_eot_idx_cf(tokenizer_, i)] * len(phrases[0])]
                    cross_attention_kwargs, grounding_kwargs = get_ms_phrase_emb(boxes, device, infer_type_g,
                                                                             [0], 1, phrase_idxes,
                                                                             1, eot_idxes, phrases, clip,tokenizer_)
                daul_emb = cf_clip(prompts_dual, clip, infer_mode,role_list,input_split=False)
            else:
                prompts_dual=[i.replace(role_list[0] ,role_dict[role_list[0]]) for i in prompts_dual if role_list[0] in i ]
                prompts_dual = [i.replace(role_list[1], role_dict[role_list[1]]) for i in prompts_dual if role_list[1] in i]
                if use_lora:
                    prompts_dual=[i+lora_trigger_words for i in prompts_dual]
                prompts_dual=[apply_style_positive(add_style,i+pos_text)[0] for i in prompts_dual] #[' The figurine  play whith  The pig in the garden,best 8k,RAW']
                daul_emb=cf_clip(prompts_dual, clip, infer_mode,role_list,input_split=False) # maker
        elif prompts_dual and infer_mode == "uno": #UNO双角色图片要单独处理
            prompts_dual=[i.replace(role_list[0] ,role_dict[role_list[0]]) for i in prompts_dual if role_list[0] in i ]
            prompts_dual = [i.replace(role_list[1], role_dict[role_list[1]]) for i in prompts_dual if role_list[1] in i]
            prompts_dual=[apply_style_positive(add_style,i+pos_text)[0] for i in prompts_dual] #[' The figurine  play whith  The pig in the garden,best 8k,RAW']

            daul_emb=[]
            for dual_t,x_1 in zip(prompts_dual,x_1_refs_dual): # dual_t:The figurine  play whith  The pig in the garden best 8k,RAW
                inp = prepare_multi_ip_wrapper(clip,prompt=dual_t, ref_imgs=x_1, pe=uno_pe,device=device,h=height,w=width)
                daul_emb.append(inp)
        elif prompts_dual and infer_mode == "dreamo":
            from .model_loader_utils import Dreamo_image_encoder
            from huggingface_hub import hf_hub_download
            BEN2_path_list=[i for i in repo_list if "BEN2_Base.pth" in i]
            if not BEN2_path_list:
                BEN2_path= hf_hub_download(repo_id='PramaLLC/BEN2', filename='BEN2_Base.pth', local_dir='ComfyUI/models')
            else:
                BEN2_path=BEN2_path_list[0]
            ref_list=tensortopil_list_upscale(image,width,height)
            #task_list=["ip", "id", "style"] #TODO
            images_emb=Dreamo_image_encoder(BEN2_path,ref_list[0],ref_list[1],"ip","ip",ref_res=512) #TODO
            prompts_dual=[i.replace(role_list[0] ,role_dict[role_list[0]]) for i in prompts_dual if role_list[0] in i ]
            prompts_dual = [i.replace(role_list[1], role_dict[role_list[1]]) for i in prompts_dual if role_list[1] in i]
            prompts_dual=[apply_style_positive(add_style,i+pos_text)[0] for i in prompts_dual] #[' The figurine  play whith  The pig in the garden,best 8k,RAW']
            daul_emb=[images_emb,prompts_dual]
        else:
            daul_emb=None
        # neg
        if infer_mode=="consistory":
            negative = None
            postive_dict={}
        elif infer_mode=="instant_character":
            postive_dict= {"role": only_role_emb, "nc": None, "daul": daul_emb}
            negative = None
        elif infer_mode=="dreamo":
            only_role_emb={}
            for key ,prompts in zip(role_list,inf_list_split):
                only_role_emb[key]=prompts
            postive_dict= {"role": only_role_emb, "nc": None, "daul": daul_emb}
            negative = neg_text # TODO
        elif infer_mode=="bagel_edit":
            only_role_emb={}
            for key ,prompts in zip(role_list,inf_list_split):
                only_role_emb[key]=prompts
            postive_dict= {"role": only_role_emb, "nc": None, "daul": None}
            negative = neg_text # TODO
        elif infer_mode=="flux_omi":
            if switch.get("no_dif_quantization"):
                postive_dict= {"role": only_role_emb, "nc": None, "daul": None} #only_role_emb:p_,pool_,ind_
                negative = neg_text # TODO
            else:
                only_role_emb={}
                for key,prompts in zip(role_list,inf_list_split):
                    only_role_emb[key]=prompts
                postive_dict= {"role": only_role_emb, "nc": None, "daul": None}
                negative = neg_text # TODO
        elif infer_mode=="realcustom":
            postive_dict= {"role": only_role_emb, "nc": None, "daul": daul_emb}
            negative=[letent_real]
        elif infer_mode=="flux_pulid":
            postive_dict= {"role": only_role_emb, "nc": None, "daul": daul_emb} #不支持NC
            negative = [inp_neg_list,noise_x]
        elif infer_mode=="uno":
            postive_dict= {"role": only_role_emb, "nc": None, "daul": daul_emb} #TODO
            negative=None
        elif infer_mode=="kolor_face":
            postive_dict = {"role": only_role_emb, "nc": nc_emb, "daul": daul_emb}
            negative = only_role_emb_ne[0]
        elif infer_mode=="qwen_image":
            postive_dict = {"role": only_role_emb, "nc": nc_emb, "daul": daul_emb}
            from .qwen_image.inferencer import get_emb_data

            image_list_=tensortolist(image, width, height) if not switch.get("edit_mode") else None
            neg_list=[[neg_text]] if len(role_list)==1 else [[neg_text],[neg_text]]
            negative= get_emb_data(clip,vae,neg_list,image_list_,role_list)

        else:
            tokens_n = clip.tokenize(neg_text)
            output_n = clip.encode_from_tokens(tokens_n, return_pooled=True, return_dict=True) #{"pooled_output":tensor}
            cond_n = output_n.pop("cond")
            if cond_n.shape[1] /77>1 and infer_mode != "classic":
                logging.warning("nagetive prompt'tokens length is abvoe 77,will split it.")
                cond_n=torch.chunk(cond_n,cond_n.shape[1] //77,dim=1)[0]

            if infer_mode == "classic":
                if nc_emb is not None:
                    for index,i in zip(nc_indexs,nc_emb):
                        only_role_emb.insert(index,i)
                if  daul_emb is not None:
                    for index,i in zip(positions_index_dual,daul_emb):
                        only_role_emb.insert(index,i)
                postive_dict = only_role_emb
                negative = [[cond_n, output_n]]
            else:
                postive_dict = {"role": only_role_emb, "nc": nc_emb, "daul": daul_emb}
                negative = [cond_n, output_n]


        # Pre emb for maker

        if img2img_mode and infer_mode in ["story_and_maker","story_maker"]:
            from .StoryMaker.pipeline_sdxl_storymaker_wrapper import encode_prompt_image_emb_
            num_images_per_prompt=1
            make_img,make_mask_img,make_face_info,make_cloth_info=[],[],[],[]

            for key in role_list:
                img_ = input_id_emb_un_dict[key][0]
                # print(character_key_str,input_id_images_dict)
                mask_image_ = input_id_img_s_dict[key][0] #mask_image
                face_info_ = input_id_emb_s_dict[key][0]
                cloth_info_ = None
                if isinstance(control_image, torch.Tensor):
                    cloth_info_ = input_id_cloth_dict[key][0]
                make_img.append(img_)
                make_mask_img.append(mask_image_)
                make_face_info.append(face_info_)
                make_cloth_info.append(cloth_info_)

            mask_image_2=None
            face_info_2=None
            cloth_2=None