Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit fa3d5b5

Browse files
chore: refactor ggml.h
1 parent 23d7d21 commit fa3d5b5

File tree

4 files changed

+192
-142
lines changed

4 files changed

+192
-142
lines changed

engine/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h"
169169

170170
add_executable(${TARGET_NAME} main.cc
171171
${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc
172+
${CMAKE_CURRENT_SOURCE_DIR}/utils/hardware/gguf/ggml.cc
172173
${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
173174

174175
${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc

engine/cli/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ find_package(lfreist-hwinfo CONFIG REQUIRED)
7474

7575
add_executable(${TARGET_NAME} main.cc
7676
${CMAKE_CURRENT_SOURCE_DIR}/../utils/cpuid/cpu_info.cc
77+
${CMAKE_CURRENT_SOURCE_DIR}/../utils/hardware/gguf/ggml.cc
7778
${CMAKE_CURRENT_SOURCE_DIR}/../utils/normalize_engine.cc
7879
${CMAKE_CURRENT_SOURCE_DIR}/../utils/file_logger.cc
7980
${CMAKE_CURRENT_SOURCE_DIR}/../utils/dylib_path_manager.cc

engine/utils/hardware/gguf/ggml.cc

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#include "ggml.h"
2+
3+
namespace hardware {
4+
5+
float GetQuantBit(GGMLType gt) {
6+
switch (gt) {
7+
case GGML_TYPE_I32:
8+
case GGML_TYPE_F32:
9+
return 32.0f;
10+
case GGML_TYPE_I16:
11+
case GGML_TYPE_BF16:
12+
case GGML_TYPE_F16:
13+
return 16.0f;
14+
case GGML_TYPE_IQ2_S:
15+
case GGML_TYPE_IQ2_XXS:
16+
case GGML_TYPE_IQ2_XS:
17+
return 2.31f;
18+
case GGML_TYPE_Q2_K:
19+
return 2.5625f;
20+
case GGML_TYPE_IQ3_XXS:
21+
case GGML_TYPE_IQ3_S:
22+
case GGML_TYPE_Q3_K:
23+
return 3.4375f;
24+
case GGML_TYPE_Q4_0_4_4:
25+
case GGML_TYPE_Q4_0_4_8:
26+
case GGML_TYPE_Q4_0_8_8:
27+
case GGML_TYPE_IQ4_NL:
28+
case GGML_TYPE_IQ4_XS:
29+
case GGML_TYPE_Q4_0:
30+
case GGML_TYPE_Q4_1:
31+
case GGML_TYPE_Q4_K:
32+
return 4.5f;
33+
case GGML_TYPE_Q5_0:
34+
case GGML_TYPE_Q5_1:
35+
case GGML_TYPE_Q5_K:
36+
return 5.5f;
37+
case GGML_TYPE_Q6_K:
38+
return 6.5625f;
39+
case GGML_TYPE_I8:
40+
case GGML_TYPE_Q8_0:
41+
case GGML_TYPE_Q8_1:
42+
case GGML_TYPE_Q8_K:
43+
return 8.0f;
44+
case GGML_TYPE_I64:
45+
case GGML_TYPE_F64:
46+
return 64.0f;
47+
default:
48+
return 8.0f;
49+
}
50+
}
51+
52+
std::string to_string(GGMLType t) {
53+
switch (t) {
54+
case GGML_TYPE_F32:
55+
return "F32";
56+
case GGML_TYPE_F16:
57+
return "F16";
58+
case GGML_TYPE_Q4_0:
59+
return "Q4_0";
60+
case GGML_TYPE_Q4_1:
61+
return "Q4_1";
62+
case GGML_TYPE_Q5_0:
63+
return "Q5_0";
64+
case GGML_TYPE_Q5_1:
65+
return "Q5_1";
66+
case GGML_TYPE_Q8_0:
67+
return "Q8_0";
68+
case GGML_TYPE_Q8_1:
69+
return "Q8_1";
70+
case GGML_TYPE_Q2_K:
71+
return "Q2_K";
72+
case GGML_TYPE_Q3_K:
73+
return "Q3_K";
74+
case GGML_TYPE_Q4_K:
75+
return "Q4_K";
76+
case GGML_TYPE_Q5_K:
77+
return "Q5_K";
78+
case GGML_TYPE_Q6_K:
79+
return "Q6_K";
80+
case GGML_TYPE_Q8_K:
81+
return "Q8_K";
82+
case GGML_TYPE_IQ2_XXS:
83+
return "IQ2_XXS";
84+
case GGML_TYPE_IQ2_XS:
85+
return "IQ2_XS";
86+
case GGML_TYPE_IQ3_XXS:
87+
return "IQ3_XXS";
88+
case GGML_TYPE_IQ1_S:
89+
return "IQ1_S";
90+
case GGML_TYPE_IQ4_NL:
91+
return "IQ4_NL";
92+
case GGML_TYPE_IQ3_S:
93+
return "IQ3_S";
94+
case GGML_TYPE_IQ2_S:
95+
return "IQ2_S";
96+
case GGML_TYPE_IQ4_XS:
97+
return "IQ4_XS";
98+
case GGML_TYPE_I8:
99+
return "I8";
100+
case GGML_TYPE_I16:
101+
return "I16";
102+
case GGML_TYPE_I32:
103+
return "I32";
104+
case GGML_TYPE_I64:
105+
return "I64";
106+
case GGML_TYPE_F64:
107+
return "F64";
108+
case GGML_TYPE_IQ1_M:
109+
return "IQ1_M";
110+
case GGML_TYPE_BF16:
111+
return "BF16";
112+
case GGML_TYPE_Q4_0_4_4:
113+
return "Q4_0_4_4";
114+
case GGML_TYPE_Q4_0_4_8:
115+
return "Q4_0_4_8";
116+
case GGML_TYPE_Q4_0_8_8:
117+
return "Q4_0_8_8";
118+
case GGML_TYPE_TQ1_0:
119+
return "TQ1_0";
120+
case GGML_TYPE_TQ2_0:
121+
return "TQ2_0";
122+
default:
123+
return "Invalid";
124+
}
125+
}
126+
127+
const std::unordered_map<GGMLType, GGMLTypeTrait> kGGMLTypeTraits = {
128+
{GGML_TYPE_F32, {.block_size = 1, .type_size = 4}},
129+
{GGML_TYPE_F16, {.block_size = 1, .type_size = 2}},
130+
{GGML_TYPE_Q4_0, {.block_size = 32, .type_size = 18, .is_quantized = true}},
131+
{GGML_TYPE_Q4_1, {.block_size = 32, .type_size = 20, .is_quantized = true}},
132+
{GGML_TYPE_Q5_0, {.block_size = 32, .type_size = 22, .is_quantized = true}},
133+
{GGML_TYPE_Q5_1, {.block_size = 32, .type_size = 24, .is_quantized = true}},
134+
{GGML_TYPE_Q8_0, {.block_size = 32, .type_size = 34, .is_quantized = true}},
135+
{GGML_TYPE_Q8_1, {.block_size = 32, .type_size = 36, .is_quantized = true}},
136+
{GGML_TYPE_Q2_K,
137+
{.block_size = 256, .type_size = 84, .is_quantized = true}},
138+
{GGML_TYPE_Q3_K,
139+
{.block_size = 256, .type_size = 110, .is_quantized = true}},
140+
{GGML_TYPE_Q4_K,
141+
{.block_size = 256, .type_size = 144, .is_quantized = true}},
142+
{GGML_TYPE_Q5_K,
143+
{.block_size = 256, .type_size = 176, .is_quantized = true}},
144+
{GGML_TYPE_Q6_K,
145+
{.block_size = 256, .type_size = 210, .is_quantized = true}},
146+
{GGML_TYPE_Q8_K,
147+
{.block_size = 256, .type_size = 292, .is_quantized = true}},
148+
{GGML_TYPE_IQ2_XXS,
149+
{.block_size = 256, .type_size = 66, .is_quantized = true}},
150+
{GGML_TYPE_IQ2_XS,
151+
{.block_size = 256, .type_size = 74, .is_quantized = true}},
152+
{GGML_TYPE_IQ3_XXS,
153+
{.block_size = 256, .type_size = 98, .is_quantized = true}},
154+
{GGML_TYPE_IQ1_S,
155+
{.block_size = 256, .type_size = 50, .is_quantized = true}},
156+
{GGML_TYPE_IQ4_NL,
157+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
158+
{GGML_TYPE_IQ3_S,
159+
{.block_size = 256, .type_size = 110, .is_quantized = true}},
160+
{GGML_TYPE_IQ2_S,
161+
{.block_size = 256, .type_size = 82, .is_quantized = true}},
162+
{GGML_TYPE_IQ4_XS,
163+
{.block_size = 256, .type_size = 136, .is_quantized = true}},
164+
{GGML_TYPE_I8, {.block_size = 1, .type_size = 1}},
165+
{GGML_TYPE_I16, {.block_size = 1, .type_size = 2}},
166+
{GGML_TYPE_I32, {.block_size = 1, .type_size = 4}},
167+
{GGML_TYPE_I64, {.block_size = 1, .type_size = 8}},
168+
{GGML_TYPE_F64, {.block_size = 1, .type_size = 8}},
169+
{GGML_TYPE_IQ1_M,
170+
{.block_size = 256, .type_size = 56, .is_quantized = true}},
171+
{GGML_TYPE_BF16, {.block_size = 1, .type_size = 2}},
172+
{GGML_TYPE_Q4_0_4_4,
173+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
174+
{GGML_TYPE_Q4_0_4_8,
175+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
176+
{GGML_TYPE_Q4_0_8_8,
177+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
178+
{GGML_TYPE_TQ1_0,
179+
{.block_size = 256, .type_size = 54, .is_quantized = true}},
180+
{GGML_TYPE_TQ2_0,
181+
{.block_size = 256, .type_size = 66, .is_quantized = true}},
182+
};
183+
184+
} // namespace hardware

engine/utils/hardware/gguf/ggml.h

Lines changed: 6 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <unordered_map>
55

66
namespace hardware {
7+
78
enum GGMLType {
89
GGML_TYPE_F32 = 0,
910
GGML_TYPE_F16 = 1,
@@ -44,152 +45,15 @@ enum GGMLType {
4445
GGML_TYPE_COUNT,
4546
};
4647

47-
inline float GetQuantBit(GGMLType gt) {
48-
switch (gt) {
49-
case GGML_TYPE_I32:
50-
case GGML_TYPE_F32:
51-
return 32.0f;
52-
case GGML_TYPE_I16:
53-
case GGML_TYPE_BF16:
54-
case GGML_TYPE_F16:
55-
return 16.0f;
56-
case GGML_TYPE_IQ2_S:
57-
case GGML_TYPE_IQ2_XXS:
58-
case GGML_TYPE_IQ2_XS:
59-
return 2.31f;
60-
case GGML_TYPE_Q2_K:
61-
return 2.5625f;
62-
case GGML_TYPE_IQ3_XXS:
63-
case GGML_TYPE_IQ3_S:
64-
case GGML_TYPE_Q3_K:
65-
return 3.4375f;
66-
case GGML_TYPE_Q4_0_4_4:
67-
case GGML_TYPE_Q4_0_4_8:
68-
case GGML_TYPE_Q4_0_8_8:
69-
case GGML_TYPE_IQ4_NL:
70-
case GGML_TYPE_IQ4_XS:
71-
case GGML_TYPE_Q4_0:
72-
case GGML_TYPE_Q4_1:
73-
case GGML_TYPE_Q4_K:
74-
return 4.5f;
75-
case GGML_TYPE_Q5_0:
76-
case GGML_TYPE_Q5_1:
77-
case GGML_TYPE_Q5_K:
78-
return 5.5f;
79-
case GGML_TYPE_Q6_K:
80-
return 6.5625f;
81-
case GGML_TYPE_I8:
82-
case GGML_TYPE_Q8_0:
83-
case GGML_TYPE_Q8_1:
84-
case GGML_TYPE_Q8_K:
85-
return 8.0f;
86-
case GGML_TYPE_I64:
87-
case GGML_TYPE_F64:
88-
return 64.0f;
89-
default:
90-
return 8.0f;
91-
}
92-
}
93-
94-
inline std::string to_string(GGMLType t) {
95-
switch (t) {
96-
case GGML_TYPE_F32:
97-
return "F32";
98-
case GGML_TYPE_F16:
99-
return "F16";
100-
case GGML_TYPE_Q4_0:
101-
return "Q4_0";
102-
case GGML_TYPE_Q4_1:
103-
return "Q4_1";
104-
case GGML_TYPE_Q5_0:
105-
return "Q5_0";
106-
case GGML_TYPE_Q5_1:
107-
return "Q5_1";
108-
case GGML_TYPE_Q8_0:
109-
return "Q8_0";
110-
case GGML_TYPE_Q8_1:
111-
return "Q8_1";
112-
case GGML_TYPE_Q2_K:
113-
return "Q2_K";
114-
case GGML_TYPE_Q3_K:
115-
return "Q3_K";
116-
case GGML_TYPE_Q4_K:
117-
return "Q4_K";
118-
case GGML_TYPE_Q5_K:
119-
return "Q5_K";
120-
case GGML_TYPE_Q6_K:
121-
return "Q6_K";
122-
case GGML_TYPE_Q8_K:
123-
return "Q8_K";
124-
case GGML_TYPE_IQ2_XXS:
125-
return "IQ2_XXS";
126-
case GGML_TYPE_IQ2_XS:
127-
return "IQ2_XS";
128-
case GGML_TYPE_IQ3_XXS:
129-
return "IQ3_XXS";
130-
case GGML_TYPE_IQ1_S:
131-
return "IQ1_S";
132-
case GGML_TYPE_IQ4_NL:
133-
return "IQ4_NL";
134-
case GGML_TYPE_IQ3_S:
135-
return "IQ3_S";
136-
case GGML_TYPE_IQ2_S:
137-
return "IQ2_S";
138-
case GGML_TYPE_IQ4_XS:
139-
return "IQ4_XS";
140-
case GGML_TYPE_I8:
141-
return "I8";
142-
case GGML_TYPE_I16:
143-
return "I16";
144-
case GGML_TYPE_I32:
145-
return "I32";
146-
case GGML_TYPE_I64:
147-
return "I64";
148-
case GGML_TYPE_F64:
149-
return "F64";
150-
case GGML_TYPE_IQ1_M:
151-
return "IQ1_M";
152-
case GGML_TYPE_BF16:
153-
return "BF16";
154-
case GGML_TYPE_Q4_0_4_4:
155-
return "Q4_0_4_4";
156-
case GGML_TYPE_Q4_0_4_8:
157-
return "Q4_0_4_8";
158-
case GGML_TYPE_Q4_0_8_8:
159-
return "Q4_0_8_8";
160-
case GGML_TYPE_TQ1_0:
161-
return "TQ1_0";
162-
case GGML_TYPE_TQ2_0:
163-
return "TQ2_0";
164-
default:
165-
return "Invalid";
166-
}
167-
}
168-
16948
struct GGMLTypeTrait {
17049
uint64_t block_size;
17150
uint64_t type_size;
17251
bool is_quantized = false;
17352
};
17453

175-
const std::unordered_map<GGMLType, GGMLTypeTrait> kGGMLTypeTraits = {
176-
{GGML_TYPE_F32, {1, 4, false}}, {GGML_TYPE_F16, {1, 2, false}},
177-
{GGML_TYPE_Q4_0, {32, 18, true}}, {GGML_TYPE_Q4_1, {32, 20, true}},
178-
{GGML_TYPE_Q5_0, {32, 22, true}}, {GGML_TYPE_Q5_1, {32, 24, true}},
179-
{GGML_TYPE_Q8_0, {32, 34, true}}, {GGML_TYPE_Q8_1, {32, 36, true}},
180-
{GGML_TYPE_Q2_K, {256, 84, true}}, {GGML_TYPE_Q3_K, {256, 110, true}},
181-
{GGML_TYPE_Q4_K, {256, 144, true}}, {GGML_TYPE_Q5_K, {256, 176, true}},
182-
{GGML_TYPE_Q6_K, {256, 210, true}}, {GGML_TYPE_Q8_K, {256, 292, true}},
183-
{GGML_TYPE_IQ2_XXS, {256, 66, true}}, {GGML_TYPE_IQ2_XS, {256, 74, true}},
184-
{GGML_TYPE_IQ3_XXS, {256, 98, true}}, {GGML_TYPE_IQ1_S, {256, 50, true}},
185-
{GGML_TYPE_IQ4_NL, {32, 18, true}}, {GGML_TYPE_IQ3_S, {256, 110, true}},
186-
{GGML_TYPE_IQ2_S, {256, 82, true}}, {GGML_TYPE_IQ4_XS, {256, 136, true}},
187-
{GGML_TYPE_I8, {1, 1, false}}, {GGML_TYPE_I16, {1, 2, false}},
188-
{GGML_TYPE_I32, {1, 4, false}}, {GGML_TYPE_I64, {1, 8, false}},
189-
{GGML_TYPE_F64, {1, 8, false}}, {GGML_TYPE_IQ1_M, {256, 56, true}},
190-
{GGML_TYPE_BF16, {1, 2, false}}, {GGML_TYPE_Q4_0_4_4, {32, 18, true}},
191-
{GGML_TYPE_Q4_0_4_8, {32, 18, true}}, {GGML_TYPE_Q4_0_8_8, {32, 18, true}},
192-
{GGML_TYPE_TQ1_0, {256, 54, true}}, {GGML_TYPE_TQ2_0, {256, 66, true}},
193-
};
54+
extern const std::unordered_map<GGMLType, GGMLTypeTrait> kGGMLTypeTraits;
55+
56+
float GetQuantBit(GGMLType gt);
57+
std::string to_string(GGMLType t);
19458

195-
} // namespace hardware
59+
} // namespace hardware

0 commit comments

Comments
 (0)