11#ifndef __FLUX_HPP__
22#define __FLUX_HPP__
33
4+ #include < memory>
45#include < vector>
56
67#include " ggml_extend.hpp"
@@ -18,7 +19,7 @@ namespace Flux {
1819 blocks[" out_layer" ] = std::shared_ptr<GGMLBlock>(new Linear (hidden_dim, hidden_dim, true ));
1920 }
2021
21- struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) {
22+ struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) override {
2223 // x: [..., in_dim]
2324 // return: [..., hidden_dim]
2425 auto in_layer = std::dynamic_pointer_cast<Linear>(blocks[" in_layer" ]);
@@ -36,7 +37,7 @@ namespace Flux {
3637 int64_t hidden_size;
3738 float eps;
3839
39- void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) {
40+ void init_params (struct ggml_context * ctx, const String2GGMLType& tensor_types = {}, const std::string prefix = " " ) override {
4041 ggml_type wtype = GGML_TYPE_F32;
4142 params[" scale" ] = ggml_new_tensor_1d (ctx, wtype, hidden_size);
4243 }
@@ -47,7 +48,7 @@ namespace Flux {
4748 : hidden_size(hidden_size),
4849 eps (eps) {}
4950
50- struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) {
51+ struct ggml_tensor * forward (struct ggml_context * ctx, struct ggml_tensor * x) override {
5152 struct ggml_tensor * w = params[" scale" ];
5253 x = ggml_rms_norm (ctx, x, eps);
5354 x = ggml_mul (ctx, x, w);
@@ -136,11 +137,11 @@ namespace Flux {
136137 };
137138
138139 struct ModulationOut {
139- ggml_tensor* shift = NULL ;
140- ggml_tensor* scale = NULL ;
141- ggml_tensor* gate = NULL ;
140+ ggml_tensor* shift = nullptr ;
141+ ggml_tensor* scale = nullptr ;
142+ ggml_tensor* gate = nullptr ;
142143
143- ModulationOut (ggml_tensor* shift = NULL , ggml_tensor* scale = NULL , ggml_tensor* gate = NULL )
144+ ModulationOut (ggml_tensor* shift = nullptr , ggml_tensor* scale = nullptr , ggml_tensor* gate = nullptr )
144145 : shift(shift), scale(scale), gate(gate) {}
145146
146147 ModulationOut (struct ggml_context * ctx, ggml_tensor* vec, int64_t offset) {
@@ -259,7 +260,7 @@ namespace Flux {
259260 struct ggml_tensor * txt,
260261 struct ggml_tensor * vec,
261262 struct ggml_tensor * pe,
262- struct ggml_tensor * mask = NULL ) {
263+ struct ggml_tensor * mask = nullptr ) {
263264 // img: [N, n_img_token, hidden_size]
264265 // txt: [N, n_txt_token, hidden_size]
265266 // pe: [n_img_token + n_txt_token, d_head/2, 2, 2]
@@ -398,15 +399,15 @@ namespace Flux {
398399
399400 ModulationOut get_distil_mod (struct ggml_context * ctx, struct ggml_tensor * vec) {
400401 int64_t offset = 3 * idx;
401- return ModulationOut ( ctx, vec, offset) ;
402+ return { ctx, vec, offset} ;
402403 }
403404
404405 struct ggml_tensor * forward (struct ggml_context * ctx,
405406 ggml_backend_t backend,
406407 struct ggml_tensor * x,
407408 struct ggml_tensor * vec,
408409 struct ggml_tensor * pe,
409- struct ggml_tensor * mask = NULL ) {
410+ struct ggml_tensor * mask = nullptr ) {
410411 // x: [N, n_token, hidden_size]
411412 // pe: [n_token, d_head/2, 2, 2]
412413 // return: [N, n_token, hidden_size]
@@ -485,7 +486,7 @@ namespace Flux {
485486 auto shift = ggml_view_2d (ctx, vec, vec->ne [0 ], vec->ne [1 ], vec->nb [1 ], stride * (offset + 0 )); // [N, dim]
486487 auto scale = ggml_view_2d (ctx, vec, vec->ne [0 ], vec->ne [1 ], vec->nb [1 ], stride * (offset + 1 )); // [N, dim]
487488 // No gate
488- return ModulationOut ( shift, scale, NULL ) ;
489+ return { shift, scale, nullptr } ;
489490 }
490491
491492 struct ggml_tensor * forward (struct ggml_context * ctx,
@@ -664,15 +665,15 @@ namespace Flux {
664665 struct ggml_tensor * y,
665666 struct ggml_tensor * guidance,
666667 struct ggml_tensor * pe,
667- struct ggml_tensor * mod_index_arange = NULL ,
668+ struct ggml_tensor * mod_index_arange = nullptr ,
668669 std::vector<int > skip_layers = {}) {
669670 auto img_in = std::dynamic_pointer_cast<Linear>(blocks[" img_in" ]);
670671 auto txt_in = std::dynamic_pointer_cast<Linear>(blocks[" txt_in" ]);
671672 auto final_layer = std::dynamic_pointer_cast<LastLayer>(blocks[" final_layer" ]);
672673
673674 img = img_in->forward (ctx, img);
674675 struct ggml_tensor * vec;
675- struct ggml_tensor * txt_img_mask = NULL ;
676+ struct ggml_tensor * txt_img_mask = nullptr ;
676677 if (params.is_chroma ) {
677678 int64_t mod_index_length = 344 ;
678679 auto approx = std::dynamic_pointer_cast<ChromaApproximator>(blocks[" distilled_guidance_layer" ]);
@@ -681,7 +682,7 @@ namespace Flux {
681682
682683 // auto mod_index_arange = ggml_arange(ctx, 0, (float)mod_index_length, 1);
683684 // ggml_arange tot working on a lot of backends, precomputing it on CPU instead
684- GGML_ASSERT (arange != NULL );
685+ GGML_ASSERT (arange != nullptr );
685686 auto modulation_index = ggml_nn_timestep_embedding (ctx, mod_index_arange, 32 , 10000 , 1000 .f ); // [1, 344, 32]
686687
687688 // Batch broadcast (will it ever be useful)
@@ -695,15 +696,15 @@ namespace Flux {
695696 vec = ggml_cont (ctx, ggml_permute (ctx, vec, 0 , 2 , 1 , 3 )); // [344, N, 64]
696697 vec = approx->forward (ctx, vec); // [344, N, hidden_size]
697698
698- if (y != NULL ) {
699- txt_img_mask = sd_pad (ctx, y, img->ne [1 ], 0 , 0 , 0 );
699+ if (y != nullptr ) {
700+ txt_img_mask = ggml_pad (ctx, y, img->ne [1 ], 0 , 0 , 0 );
700701 }
701702 } else {
702703 auto time_in = std::dynamic_pointer_cast<MLPEmbedder>(blocks[" time_in" ]);
703704 auto vector_in = std::dynamic_pointer_cast<MLPEmbedder>(blocks[" vector_in" ]);
704705 vec = time_in->forward (ctx, ggml_nn_timestep_embedding (ctx, timesteps, 256 , 10000 , 1000 .f ));
705706 if (params.guidance_embed ) {
706- GGML_ASSERT (guidance != NULL );
707+ GGML_ASSERT (guidance != nullptr );
707708 auto guidance_in = std::dynamic_pointer_cast<MLPEmbedder>(blocks[" guidance_in" ]);
708709 // bf16 and fp16 result is different
709710 auto g_in = ggml_nn_timestep_embedding (ctx, guidance, 256 , 10000 , 1000 .f );
@@ -759,7 +760,7 @@ namespace Flux {
759760 int64_t patch_size = 2 ;
760761 int pad_h = (patch_size - H % patch_size) % patch_size;
761762 int pad_w = (patch_size - W % patch_size) % patch_size;
762- x = sd_pad (ctx, x, pad_w, pad_h, 0 , 0 ); // [N, C, H + pad_h, W + pad_w]
763+ x = ggml_pad (ctx, x, pad_w, pad_h, 0 , 0 ); // [N, C, H + pad_h, W + pad_w]
763764
764765 // img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
765766 auto img = patchify (ctx, x, patch_size); // [N, h*w, C * patch_size * patch_size]
@@ -775,14 +776,14 @@ namespace Flux {
775776 struct ggml_tensor * y,
776777 struct ggml_tensor * guidance,
777778 struct ggml_tensor * pe,
778- struct ggml_tensor * mod_index_arange = NULL ,
779+ struct ggml_tensor * mod_index_arange = nullptr ,
779780 std::vector<ggml_tensor*> ref_latents = {},
780781 std::vector<int > skip_layers = {}) {
781782 // Forward pass of DiT.
782783 // x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
783784 // timestep: (N,) tensor of diffusion timesteps
784785 // context: (N, L, D)
785- // c_concat: NULL , or for (N,C+M, H, W) for Fill
786+ // c_concat: nullptr , or for (N,C+M, H, W) for Fill
786787 // y: (N, adm_in_channels) tensor of class labels
787788 // guidance: (N,)
788789 // pe: (L, d_head/2, 2, 2)
@@ -801,7 +802,7 @@ namespace Flux {
801802 uint64_t img_tokens = img->ne [1 ];
802803
803804 if (params.version == VERSION_FLUX_FILL) {
804- GGML_ASSERT (c_concat != NULL );
805+ GGML_ASSERT (c_concat != nullptr );
805806 ggml_tensor* masked = ggml_view_4d (ctx, c_concat, c_concat->ne [0 ], c_concat->ne [1 ], C, 1 , c_concat->nb [1 ], c_concat->nb [2 ], c_concat->nb [3 ], 0 );
806807 ggml_tensor* mask = ggml_view_4d (ctx, c_concat, c_concat->ne [0 ], c_concat->ne [1 ], 8 * 8 , 1 , c_concat->nb [1 ], c_concat->nb [2 ], c_concat->nb [3 ], c_concat->nb [2 ] * C);
807808
@@ -810,24 +811,24 @@ namespace Flux {
810811
811812 img = ggml_concat (ctx, img, ggml_concat (ctx, masked, mask, 0 ), 0 );
812813 } else if (params.version == VERSION_FLEX_2) {
813- GGML_ASSERT (c_concat != NULL );
814+ GGML_ASSERT (c_concat != nullptr );
814815 ggml_tensor* masked = ggml_view_4d (ctx, c_concat, c_concat->ne [0 ], c_concat->ne [1 ], C, 1 , c_concat->nb [1 ], c_concat->nb [2 ], c_concat->nb [3 ], 0 );
815816 ggml_tensor* mask = ggml_view_4d (ctx, c_concat, c_concat->ne [0 ], c_concat->ne [1 ], 1 , 1 , c_concat->nb [1 ], c_concat->nb [2 ], c_concat->nb [3 ], c_concat->nb [2 ] * C);
816817 ggml_tensor* control = ggml_view_4d (ctx, c_concat, c_concat->ne [0 ], c_concat->ne [1 ], C, 1 , c_concat->nb [1 ], c_concat->nb [2 ], c_concat->nb [3 ], c_concat->nb [2 ] * (C + 1 ));
817818
818- masked = sd_pad (ctx, masked, pad_w, pad_h, 0 , 0 );
819- mask = sd_pad (ctx, mask, pad_w, pad_h, 0 , 0 );
820- control = sd_pad (ctx, control, pad_w, pad_h, 0 , 0 );
819+ masked = ggml_pad (ctx, masked, pad_w, pad_h, 0 , 0 );
820+ mask = ggml_pad (ctx, mask, pad_w, pad_h, 0 , 0 );
821+ control = ggml_pad (ctx, control, pad_w, pad_h, 0 , 0 );
821822
822823 masked = patchify (ctx, masked, patch_size);
823824 mask = patchify (ctx, mask, patch_size);
824825 control = patchify (ctx, control, patch_size);
825826
826827 img = ggml_concat (ctx, img, ggml_concat (ctx, ggml_concat (ctx, masked, mask, 0 ), control, 0 ), 0 );
827828 } else if (params.version == VERSION_FLUX_CONTROLS) {
828- GGML_ASSERT (c_concat != NULL );
829+ GGML_ASSERT (c_concat != nullptr );
829830
830- ggml_tensor* control = sd_pad (ctx, c_concat, pad_w, pad_h, 0 , 0 );
831+ ggml_tensor* control = ggml_pad (ctx, c_concat, pad_w, pad_h, 0 , 0 );
831832 control = patchify (ctx, control, patch_size);
832833 img = ggml_concat (ctx, img, control, 0 );
833834 }
@@ -924,7 +925,7 @@ namespace Flux {
924925 flux.init (params_ctx, tensor_types, prefix);
925926 }
926927
927- std::string get_desc () {
928+ std::string get_desc () override {
928929 return " flux" ;
929930 }
930931
@@ -944,18 +945,18 @@ namespace Flux {
944945 GGML_ASSERT (x->ne [3 ] == 1 );
945946 struct ggml_cgraph * gf = ggml_new_graph_custom (compute_ctx, FLUX_GRAPH_SIZE, false );
946947
947- struct ggml_tensor * mod_index_arange = NULL ;
948+ struct ggml_tensor * mod_index_arange = nullptr ;
948949
949950 x = to_backend (x);
950951 context = to_backend (context);
951- if (c_concat != NULL ) {
952+ if (c_concat != nullptr ) {
952953 c_concat = to_backend (c_concat);
953954 }
954955 if (flux_params.is_chroma ) {
955956 guidance = ggml_set_f32 (guidance, 0 );
956957
957958 if (!use_mask) {
958- y = NULL ;
959+ y = nullptr ;
959960 }
960961
961962 // ggml_arange is not working on some backends, precompute it
@@ -987,7 +988,7 @@ namespace Flux {
987988 auto pe = ggml_new_tensor_4d (compute_ctx, GGML_TYPE_F32, 2 , 2 , flux_params.axes_dim_sum / 2 , pos_len);
988989 // pe->data = pe_vec.data();
989990 // print_ggml_tensor(pe);
990- // pe->data = NULL ;
991+ // pe->data = nullptr ;
991992 set_backend_tensor_data (pe, pe_vec.data ());
992993
993994 struct ggml_tensor * out = flux.forward (compute_ctx,
@@ -1017,8 +1018,8 @@ namespace Flux {
10171018 struct ggml_tensor * guidance,
10181019 std::vector<ggml_tensor*> ref_latents = {},
10191020 bool increase_ref_index = false ,
1020- struct ggml_tensor ** output = NULL ,
1021- struct ggml_context * output_ctx = NULL ,
1021+ struct ggml_tensor ** output = nullptr ,
1022+ struct ggml_context * output_ctx = nullptr ,
10221023 std::vector<int > skip_layers = std::vector<int >()) {
10231024 // x: [N, in_channels, h, w]
10241025 // timesteps: [N, ]
@@ -1035,11 +1036,11 @@ namespace Flux {
10351036 void test () {
10361037 struct ggml_init_params params;
10371038 params.mem_size = static_cast <size_t >(20 * 1024 * 1024 ); // 20 MB
1038- params.mem_buffer = NULL ;
1039+ params.mem_buffer = nullptr ;
10391040 params.no_alloc = false ;
10401041
10411042 struct ggml_context * work_ctx = ggml_init (params);
1042- GGML_ASSERT (work_ctx != NULL );
1043+ GGML_ASSERT (work_ctx != nullptr );
10431044
10441045 {
10451046 // cpu f16:
@@ -1063,10 +1064,10 @@ namespace Flux {
10631064 ggml_set_f32 (y, 0 .01f );
10641065 // print_ggml_tensor(y);
10651066
1066- struct ggml_tensor * out = NULL ;
1067+ struct ggml_tensor * out = nullptr ;
10671068
10681069 int t0 = ggml_time_ms ();
1069- compute (8 , x, timesteps, context, NULL , y, guidance, {}, false , &out, work_ctx);
1070+ compute (8 , x, timesteps, context, nullptr , y, guidance, {}, false , &out, work_ctx);
10701071 int t1 = ggml_time_ms ();
10711072
10721073 print_ggml_tensor (out);
@@ -1078,7 +1079,7 @@ namespace Flux {
10781079 // ggml_backend_t backend = ggml_backend_cuda_init(0);
10791080 ggml_backend_t backend = ggml_backend_cpu_init ();
10801081 ggml_type model_data_type = GGML_TYPE_Q8_0;
1081- std::shared_ptr<FluxRunner> flux = std::shared_ptr <FluxRunner>(new FluxRunner ( backend, false ) );
1082+ std::shared_ptr<FluxRunner> flux = std::make_shared <FluxRunner>(backend, false );
10821083 {
10831084 LOG_INFO (" loading from '%s'" , file_path.c_str ());
10841085
0 commit comments