Fix formatting

orionpapadakis · orionpapadakis · commit 0e5d5e5d4ec5 · 2025-11-26T15:33:42.000+02:00
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/AbstractModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/AbstractModelLoader.java
@@ -152,12 +152,10 @@ protected GGMLTensorEntry getOutputWeight(Map<String, GGMLTensorEntry> tensorEnt
     /**
      * Create standard (CPU) weights.
      */
-    protected abstract Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, C config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                                     GGMLTensorEntry outputWeight);
+    protected abstract Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, C config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings, GGMLTensorEntry outputWeight);
 
     /**
      * Create TornadoVM (GPU) weights.
      */
-    protected abstract Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, C config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                                      GGMLTensorEntry outputWeight);
+    protected abstract Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, C config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings, GGMLTensorEntry outputWeight);
 }
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/LlamaModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/LlamaModelLoader.java
@@ -42,6 +42,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc
         return new LlamaTokenizer(metadata, vocabulary);
     }
 
+    // @formatter:off
     @Override
     protected LlamaConfiguration createConfiguration(Map<String, Object> metadata) {
         int vocabSize = metadata.containsKey("llama.vocab_size") ? (int) metadata.get("llama.vocab_size") : (int) metadata.get("tokenizer.ggml.tokens.length");
@@ -59,18 +60,19 @@ protected LlamaConfiguration createConfiguration(Map<String, Object> metadata) {
                 (float) metadata.getOrDefault("llama.attention.layer_norm_rms_epsilon", 1e-5f),
                 (float) metadata.getOrDefault("llama.rope.freq_base", 10000f)).withContextLength(contextLength);
     }
+    // @formatter:on
 
     @Override
     protected Pair<float[], float[]> precomputeRopeFrequencies(LlamaConfiguration config) {
-        return RoPE.precomputeFreqsCis(config.contextLength(), config.dim() / config.numberOfHeads(), config.ropeTheta(), false, 1.0f, 1.0f, 1.0f, config.contextLength()
-        );
+        return RoPE.precomputeFreqsCis(config.contextLength(), config.dim() / config.numberOfHeads(), config.ropeTheta(), false, 1.0f, 1.0f, 1.0f, config.contextLength());
     }
 
     @Override
     protected Llama createModel(LlamaConfiguration config, Tokenizer tokenizer, Weights weights) {
         return new Llama(config, tokenizer, weights, ChatFormat.create(tokenizer, null));
     }
 
+    // @formatter:off
     @Override
     protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, LlamaConfiguration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
                                             GGMLTensorEntry outputWeight) {
@@ -94,7 +96,9 @@ protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntri
                 loadTensor(outputWeight),
                 outputWeight.ggmlType());
     }
+    // @formatter:on
 
+    // @formatter:off
     @Override
     protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries,
                                              LlamaConfiguration config,
@@ -133,4 +137,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr
                 ggmlType
         );
     }
+    // @formatter:on
 }
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/MistralModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/MistralModelLoader.java
@@ -40,6 +40,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc
         return new MistralTokenizer(metadata, vocabulary);
     }
 
+    // @formatter:off
     @Override
     protected MistralConfiguration createConfiguration(Map<String, Object> metadata) {
         int modelContextLength = (int) metadata.get("llama.context_length");
@@ -48,29 +49,47 @@ protected MistralConfiguration createConfiguration(Map<String, Object> metadata)
         // Get vocabulary size from metadata
         int vocabSize = metadata.containsKey("llama.vocab_size") ? (int) metadata.get("llama.vocab_size") : (int) metadata.get("tokenizer.ggml.tokens.length");
 
-        return new MistralConfiguration((int) metadata.get("llama.embedding_length"), (int) metadata.get("llama.feed_forward_length"), (int) metadata.get("llama.block_count"),
+        return new MistralConfiguration(
+                (int) metadata.get("llama.embedding_length"),
+                (int) metadata.get("llama.feed_forward_length"),
+                (int) metadata.get("llama.block_count"),
                 (int) metadata.get("llama.attention.head_count"),
-
-                metadata.containsKey("llama.attention.head_count_kv") ? (int) metadata.get("llama.attention.head_count_kv") : (int) metadata.get("llama.attention.head_count"),
-
-                vocabSize, finalContextLength, false, (float) metadata.getOrDefault("llama.attention.layer_norm_rms_epsilon", 1e-5f),
-                (float) metadata.getOrDefault("llama.rope.freq_base", 10000f));
+                metadata.containsKey("llama.attention.head_count_kv") ?
+                        (int) metadata.get("llama.attention.head_count_kv")
+                        : (int) metadata.get("llama.attention.head_count"),
+                vocabSize,
+                finalContextLength,
+                false,
+                (float) metadata.getOrDefault("llama.attention.layer_norm_rms_epsilon", 1e-5f),
+                (float) metadata.getOrDefault("llama.rope.freq_base", 10000f)
+        );
     }
+    // @formatter:on
 
+    // @formatter:off
     @Override
     protected Pair<float[], float[]> precomputeRopeFrequencies(MistralConfiguration config) {
-        return RoPE.precomputeFreqsCis(config.contextLength(), config.dim() / config.numberOfHeads(), config.ropeTheta(), false, 1.0f, 1.0f, 1.0f, config.contextLength()
+        return RoPE.precomputeFreqsCis(
+                config.contextLength(),
+                config.dim() / config.numberOfHeads(),
+                config.ropeTheta(),
+                false,
+                1.0f,
+                1.0f,
+                1.0f,
+                config.contextLength()
         );
     }
+    // @formatter:on
 
     @Override
     protected Mistral createModel(MistralConfiguration config, Tokenizer tokenizer, Weights weights) {
         return new Mistral(config, tokenizer, weights, ChatFormat.create(tokenizer, null));
     }
 
+    // @formatter:off
     @Override
-    protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, MistralConfiguration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                            GGMLTensorEntry outputWeight) {
+    protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, MistralConfiguration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings, GGMLTensorEntry outputWeight) {
 
         final int nl = config.numberOfLayers();
 
@@ -91,10 +110,11 @@ protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntri
                 loadTensor(outputWeight),
                 outputWeight.ggmlType());
     }
+    // @formatter:off
 
+    // @formatter:off
     @Override
-    protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, MistralConfiguration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                             GGMLTensorEntry outputWeight) {
+    protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, MistralConfiguration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings, GGMLTensorEntry outputWeight) {
         GGMLType ggmlType = outputWeight.ggmlType();
 
         if (TornadoVMMasterPlan.ENABLE_TORNADOVM_INIT_TIME) {
@@ -127,4 +147,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr
                 ggmlType
         );
     }
+    // @formatter:on
 }
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/ModelLoader.java
@@ -61,17 +61,16 @@ private static ModelType detectModelType(Map<String, Object> metadata) {
             } else if (lowerName.contains("phi3") || lowerName.contains("phi-3")) {
                 return ModelType.PHI_3;
             }
-
         }
 
         return ModelType.UNKNOWN;
     }
 
     /**
      * Loads the language model based on the given options.
-     * <p>
-     * If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model. Otherwise, loads the model from the specified path using the model loader.
-     * </p>
+     *
+     * <p>If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model.
+     * Otherwise, loads the model from the specified path using the model loader.
      *
      * @param options the parsed CLI options containing model path and max token limit
      * @return the loaded {@link Model} instance
@@ -279,5 +278,4 @@ public static FloatBuffer toFloatBuffer(GGMLTensorEntry tensorEntry) {
             default -> throw new UnsupportedOperationException("Conversion to " + ggmlType);
         };
     }
-
 }
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/Phi3ModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/Phi3ModelLoader.java
@@ -46,6 +46,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc
         return new Phi3Tokenizer(metadata, vocabulary);
     }
 
+    // @formatter:off
     @Override
     protected Phi3Configuration createConfiguration(Map<String, Object> metadata) {
         final String modelPrefix = "phi3.";
@@ -67,18 +68,26 @@ protected Phi3Configuration createConfiguration(Map<String, Object> metadata) {
         );
         return config;
     }
+    // @formatter:off
 
+    // @formatter:off
     @Override
     protected Pair<float[], float[]> precomputeRopeFrequencies(Phi3Configuration config) {
         // Calculate head size from dim and numberOfHeads
         int headSize = config.dim() / config.numberOfHeads();
 
-        return RoPE.precomputeFreqsCis(modelContextLength,    // Use model context length for RoPE precomputation
-                headSize,              // Calculated head size
-                config.ropeTheta(), false,                 // Phi3 uses standard RoPE, not neox-style based on reference
-                8, 1, 3, 8192         // Additional RoPE parameters from reference
+        return RoPE.precomputeFreqsCis(
+                modelContextLength,     // Use model context length for RoPE precomputation
+                headSize,               // Calculated head size
+                config.ropeTheta(),
+                false,      // Phi3 uses standard RoPE, not neox-style based on reference
+                8,
+                1,
+                3,
+                8192                    // Additional RoPE parameters from reference
         );
     }
+    // @formatter:off
 
     @Override
     protected Phi3 createModel(Phi3Configuration config, Tokenizer tokenizer, Weights weights) {
@@ -88,33 +97,34 @@ protected Phi3 createModel(Phi3Configuration config, Tokenizer tokenizer, Weight
         return new Phi3(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));
     }
 
+    // @formatter:off
     @Override
-    protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, Phi3Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                            GGMLTensorEntry outputWeight) {
+    protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, Phi3Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings, GGMLTensorEntry outputWeight) {
         float[] ropeFreqsReal = ropeFreqs.first();
         float[] ropeFreqsImag = ropeFreqs.second();
 
         final int nl = config.numberOfLayers();
 
         return new Phi3StandardWeights(
-                loadTensor(tokenEmbeddings),                                                                               // token_embedding_table
+                loadTensor(tokenEmbeddings),                                                             // token_embedding_table
                 loadArrayOfTensors(nl, i -> tensorEntries.get("blk." + i + ".attn_norm.weight")),    // rms_att_weight (as FloatTensor[])
                 loadArrayOfTensors(nl, i -> tensorEntries.get("blk." + i + ".attn_qkv.weight")),     // wqkv (combined)
                 loadArrayOfTensors(nl, i -> tensorEntries.get("blk." + i + ".attn_output.weight")),  // wo
                 loadArrayOfTensors(nl, i -> tensorEntries.get("blk." + i + ".ffn_norm.weight")),     // rms_ffn_weight (as FloatTensor[])
                 loadArrayOfTensors(nl, i -> tensorEntries.get("blk." + i + ".ffn_down.weight")),     // wDown
                 loadArrayOfTensors(nl, i -> tensorEntries.get("blk." + i + ".ffn_up.weight")),       // wUp (separate, not combined)
-                loadTensor(tensorEntries.get("output_norm.weight")),                                                      // rms_final_weight (as FloatTensor)
-                new ArrayFloatTensor(ropeFreqsReal),                                                                         // freq_cis_real
-                new ArrayFloatTensor(ropeFreqsImag),                                                                         // freq_cis_imag
-                loadTensor(outputWeight),                                                                                 // wcls
-                outputWeight.ggmlType()                                                                                      // weightType
+                loadTensor(tensorEntries.get("output_norm.weight")),                                     // rms_final_weight (as FloatTensor)
+                new ArrayFloatTensor(ropeFreqsReal),                                                     // freq_cis_real
+                new ArrayFloatTensor(ropeFreqsImag),                                                     // freq_cis_imag
+                loadTensor(outputWeight),                                                                // wcls
+                outputWeight.ggmlType()                                                                  // weightType
         );
     }
+    // @formatter:on
 
+    // @formatter:off
     @Override
-    protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, Phi3Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
-                                             GGMLTensorEntry outputWeight) {
+    protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, Phi3Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings, GGMLTensorEntry outputWeight) {
         GGMLType ggmlType = outputWeight.ggmlType();
 
         if (TornadoVMMasterPlan.ENABLE_TORNADOVM_INIT_TIME) {
@@ -144,4 +154,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr
                 ggmlType
         );
     }
+    // @formatter:on
 }
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/Qwen2ModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/Qwen2ModelLoader.java
@@ -42,6 +42,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc
         return new Qwen3Tokenizer(metadata, vocabulary, isDeepSeekR1DistillQwen);
     }
 
+    // @formatter:off
     @Override
     protected Qwen2Configuration createConfiguration(Map<String, Object> metadata) {
         int modelContextLength = (int) metadata.get("qwen2.context_length");
@@ -68,12 +69,14 @@ protected Qwen2Configuration createConfiguration(Map<String, Object> metadata) {
                 (float) metadata.get("qwen2.rope.freq_base")
         );
     }
+    // @formatter:on
 
     @Override
     protected Pair<float[], float[]> precomputeRopeFrequencies(Qwen2Configuration config) {
         return RoPE.precomputeFreqsCis(config.contextLengthModel(), config.headSize(), config.ropeTheta(), false, 8, 1, 3, 8192);
     }
 
+    // @formatter:off
     @Override
     protected Qwen2 createModel(Qwen2Configuration config, Tokenizer tokenizer, Weights weights) {
         Map<String, Object> metadata = gguf.getMetadata();
@@ -83,7 +86,9 @@ protected Qwen2 createModel(Qwen2Configuration config, Tokenizer tokenizer, Weig
                 : new ChatTokens("<|im_start|>", "<|im_end|>", "", "<|end_of_text|>", "<|endoftext|>");
         return new Qwen2(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));
     }
+    // @formatter:on
 
+    // @formatter:off
     @Override
     protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, Qwen2Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
                                             GGMLTensorEntry outputWeight) {
@@ -111,7 +116,9 @@ protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntri
                 outputWeight.ggmlType()
         );
     }
+    // @formatter:on
 
+    // @formatter:off
     @Override
     protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, Qwen2Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
                                              GGMLTensorEntry outputWeight) {
@@ -152,4 +159,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr
         );
 
     }
+    // @formatter:off
 }
diff --git a/src/main/java/org/beehive/gpullama3/model/loader/Qwen3ModelLoader.java b/src/main/java/org/beehive/gpullama3/model/loader/Qwen3ModelLoader.java
@@ -43,6 +43,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc
         return new Qwen3Tokenizer(metadata, vocabulary, isDeepSeekR1DistillQwen);
     }
 
+    // @formatter:off
     @Override
     protected Qwen3Configuration createConfiguration(Map<String, Object> metadata) {
         int modelContextLength = (int) metadata.get("qwen3.context_length");
@@ -70,12 +71,14 @@ protected Qwen3Configuration createConfiguration(Map<String, Object> metadata) {
                 (float) metadata.get("qwen3.rope.freq_base")
         );
     }
+    // @formatter:on
 
     @Override
     protected Pair<float[], float[]> precomputeRopeFrequencies(Qwen3Configuration config) {
         return RoPE.precomputeFreqsCis(config.contextLengthModel(), config.numberOfHeadsKey(), config.ropeTheta(), false, 0, 0, 0, 0);
     }
 
+    // @formatter:off
     @Override
     protected Qwen3 createModel(Qwen3Configuration config, Tokenizer tokenizer, Weights weights) {
         Map<String, Object> metadata = gguf.getMetadata();
@@ -85,7 +88,9 @@ protected Qwen3 createModel(Qwen3Configuration config, Tokenizer tokenizer, Weig
                 : new ChatTokens("<|im_start|>", "<|im_end|>", "", "<|end_of_text|>", "<|endoftext|>");
         return new Qwen3(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));
     }
+    // @formatter:off
 
+    // @formatter:off
     @Override
     protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, Qwen3Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
                                             GGMLTensorEntry outputWeight) {
@@ -116,7 +121,9 @@ protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntri
                 null
         );
     }
+    // @formatter:on
 
+    // @formatter:off
     @Override
     protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, Qwen3Configuration config,
                                              Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,
@@ -151,4 +158,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr
         );
 
     }
+    // @formatter:on
 }
diff --git a/src/main/java/org/beehive/gpullama3/tensor/GGUF.java b/src/main/java/org/beehive/gpullama3/tensor/GGUF.java

Original file line number	Diff line number	Diff line change
`@@ -42,6 +42,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc`
`42`	`42`	`return new LlamaTokenizer(metadata, vocabulary);`
`43`	`43`	`}`
`44`	`44`
	`45`	`+ // @formatter:off`
`45`	`46`	`@Override`
`46`	`47`	`protected LlamaConfiguration createConfiguration(Map<String, Object> metadata) {`
`47`	`48`	`int vocabSize = metadata.containsKey("llama.vocab_size") ? (int) metadata.get("llama.vocab_size") : (int) metadata.get("tokenizer.ggml.tokens.length");`
`@@ -59,18 +60,19 @@ protected LlamaConfiguration createConfiguration(Map<String, Object> metadata) {`
`59`	`60`	`(float) metadata.getOrDefault("llama.attention.layer_norm_rms_epsilon", 1e-5f),`
`60`	`61`	`(float) metadata.getOrDefault("llama.rope.freq_base", 10000f)).withContextLength(contextLength);`
`61`	`62`	`}`
	`63`	`+ // @formatter:on`
`62`	`64`
`63`	`65`	`@Override`
`64`	`66`	`protected Pair<float[], float[]> precomputeRopeFrequencies(LlamaConfiguration config) {`
`65`		`- return RoPE.precomputeFreqsCis(config.contextLength(), config.dim() / config.numberOfHeads(), config.ropeTheta(), false, 1.0f, 1.0f, 1.0f, config.contextLength()`
`66`		`- );`
	`67`	`+ return RoPE.precomputeFreqsCis(config.contextLength(), config.dim() / config.numberOfHeads(), config.ropeTheta(), false, 1.0f, 1.0f, 1.0f, config.contextLength());`
`67`	`68`	`}`
`68`	`69`
`69`	`70`	`@Override`
`70`	`71`	`protected Llama createModel(LlamaConfiguration config, Tokenizer tokenizer, Weights weights) {`
`71`	`72`	`return new Llama(config, tokenizer, weights, ChatFormat.create(tokenizer, null));`
`72`	`73`	`}`
`73`	`74`
	`75`	`+ // @formatter:off`
`74`	`76`	`@Override`
`75`	`77`	`protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, LlamaConfiguration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,`
`76`	`78`	`GGMLTensorEntry outputWeight) {`
`@@ -94,7 +96,9 @@ protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntri`
`94`	`96`	`loadTensor(outputWeight),`
`95`	`97`	`outputWeight.ggmlType());`
`96`	`98`	`}`
	`99`	`+ // @formatter:on`
`97`	`100`
	`101`	`+ // @formatter:off`
`98`	`102`	`@Override`
`99`	`103`	`protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries,`
`100`	`104`	`LlamaConfiguration config,`
`@@ -133,4 +137,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr`
`133`	`137`	`ggmlType`
`134`	`138`	`);`
`135`	`139`	`}`
	`140`	`+ // @formatter:on`
`136`	`141`	`}`
Original file line number	Diff line number	Diff line change
`@@ -61,17 +61,16 @@ private static ModelType detectModelType(Map<String, Object> metadata) {`
`61`	`61`	`} else if (lowerName.contains("phi3") \|\| lowerName.contains("phi-3")) {`
`62`	`62`	`return ModelType.PHI_3;`
`63`	`63`	`}`
`64`		`-`
`65`	`64`	`}`
`66`	`65`
`67`	`66`	`return ModelType.UNKNOWN;`
`68`	`67`	`}`
`69`	`68`
`70`	`69`	`/**`
`71`	`70`	`* Loads the language model based on the given options.`
`72`		`- * <p>`
`73`		`- * If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model. Otherwise, loads the model from the specified path using the model loader.`
`74`		`- * </p>`
	`71`	`+ *`
	`72`	`+ * <p>If Ahead-of-Time (AOT) mode is enabled, attempts to use a pre-loaded compiled model.`
	`73`	`+ * Otherwise, loads the model from the specified path using the model loader.`
`75`	`74`	`*`
`76`	`75`	`* @param options the parsed CLI options containing model path and max token limit`
`77`	`76`	`* @return the loaded {@link Model} instance`
`@@ -279,5 +278,4 @@ public static FloatBuffer toFloatBuffer(GGMLTensorEntry tensorEntry) {`
`279`	`278`	`default -> throw new UnsupportedOperationException("Conversion to " + ggmlType);`
`280`	`279`	`};`
`281`	`280`	`}`
`282`		`-`
`283`	`281`	`}`
Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ protected Tokenizer createTokenizer(Map<String, Object> metadata, Vocabulary voc`
`43`	`43`	`return new Qwen3Tokenizer(metadata, vocabulary, isDeepSeekR1DistillQwen);`
`44`	`44`	`}`
`45`	`45`
	`46`	`+ // @formatter:off`
`46`	`47`	`@Override`
`47`	`48`	`protected Qwen3Configuration createConfiguration(Map<String, Object> metadata) {`
`48`	`49`	`int modelContextLength = (int) metadata.get("qwen3.context_length");`
`@@ -70,12 +71,14 @@ protected Qwen3Configuration createConfiguration(Map<String, Object> metadata) {`
`70`	`71`	`(float) metadata.get("qwen3.rope.freq_base")`
`71`	`72`	`);`
`72`	`73`	`}`
	`74`	`+ // @formatter:on`
`73`	`75`
`74`	`76`	`@Override`
`75`	`77`	`protected Pair<float[], float[]> precomputeRopeFrequencies(Qwen3Configuration config) {`
`76`	`78`	`return RoPE.precomputeFreqsCis(config.contextLengthModel(), config.numberOfHeadsKey(), config.ropeTheta(), false, 0, 0, 0, 0);`
`77`	`79`	`}`
`78`	`80`
	`81`	`+ // @formatter:off`
`79`	`82`	`@Override`
`80`	`83`	`protected Qwen3 createModel(Qwen3Configuration config, Tokenizer tokenizer, Weights weights) {`
`81`	`84`	`Map<String, Object> metadata = gguf.getMetadata();`
`@@ -85,7 +88,9 @@ protected Qwen3 createModel(Qwen3Configuration config, Tokenizer tokenizer, Weig`
`85`	`88`	`: new ChatTokens("<\|im_start\|>", "<\|im_end\|>", "", "<\|end_of_text\|>", "<\|endoftext\|>");`
`86`	`89`	`return new Qwen3(config, tokenizer, weights, ChatFormat.create(tokenizer, chatTokens));`
`87`	`90`	`}`
	`91`	`+ // @formatter:off`
`88`	`92`
	`93`	`+ // @formatter:off`
`89`	`94`	`@Override`
`90`	`95`	`protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntries, Qwen3Configuration config, Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,`
`91`	`96`	`GGMLTensorEntry outputWeight) {`
`@@ -116,7 +121,9 @@ protected Weights createStandardWeights(Map<String, GGMLTensorEntry> tensorEntri`
`116`	`121`	`null`
`117`	`122`	`);`
`118`	`123`	`}`
	`124`	`+ // @formatter:on`
`119`	`125`
	`126`	`+ // @formatter:off`
`120`	`127`	`@Override`
`121`	`128`	`protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntries, Qwen3Configuration config,`
`122`	`129`	`Pair<float[], float[]> ropeFreqs, GGMLTensorEntry tokenEmbeddings,`
`@@ -151,4 +158,5 @@ protected Weights createTornadoVMWeights(Map<String, GGMLTensorEntry> tensorEntr`
`151`	`158`	`);`
`152`	`159`
`153`	`160`	`}`
	`161`	`+ // @formatter:on`
`154`	`162`	`}`