Skip to content

Commit 5160443

Browse files
ixgbeggerganov
andauthored
ggml-cpu : fix RISC-V Q4_0 repack select and RVV feature reporting (#17951)
* ggml-cpu:fix RISC-V Q4_0 repack select and RVV feature reporting Signed-off-by: Wang Yang <yangwang@iscas.ac.cn> * using the name VLEN instead of CNT * Update ggml/include/ggml-cpu.h --------- Signed-off-by: Wang Yang <yangwang@iscas.ac.cn> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent 1715896 commit 5160443

File tree

4 files changed

+33
-1
lines changed

4 files changed

+33
-1
lines changed

ggml/include/ggml-cpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ extern "C" {
9999
GGML_BACKEND_API int ggml_cpu_has_sme (void);
100100
// other
101101
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
102+
GGML_BACKEND_API int ggml_cpu_get_rvv_vlen (void); // risc-v vector length in bytes
102103
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103104
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
104105
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ struct ggml_arm_arch_features_type {
8181
} ggml_arm_arch_features = { 0 };
8282
#endif
8383

84+
#if defined(__riscv)
85+
struct ggml_riscv_arch_features_type {
86+
int rvv_vlen;
87+
} ggml_riscv_arch_features = { 0 };
88+
#endif
8489

8590
#if defined(_WIN32)
8691

@@ -703,6 +708,15 @@ static void ggml_init_arm_arch_features(void) {}
703708
#endif
704709
#endif // __ARM_ARCH
705710

711+
#if defined(__riscv) && defined(__riscv_v_intrinsic)
712+
#include <riscv_vector.h>
713+
static void ggml_init_riscv_arch_features(void) {
714+
ggml_riscv_arch_features.rvv_vlen = __riscv_vlenb();
715+
}
716+
#else
717+
static void ggml_init_riscv_arch_features(void) {}
718+
#endif
719+
706720
struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
707721
GGML_ASSERT(!ggml_get_no_alloc(ctx));
708722

@@ -3459,6 +3473,14 @@ int ggml_cpu_has_riscv_v(void) {
34593473
#endif
34603474
}
34613475

3476+
int ggml_cpu_get_rvv_vlen(void) {
3477+
#if defined(__riscv) && defined(__riscv_v_intrinsic)
3478+
return ggml_riscv_arch_features.rvv_vlen;
3479+
#else
3480+
return 0;
3481+
#endif
3482+
}
3483+
34623484
int ggml_cpu_has_f16c(void) {
34633485
#if defined(__F16C__)
34643486
return 1;
@@ -3625,6 +3647,10 @@ void ggml_cpu_init(void) {
36253647
ggml_init_arm_arch_features();
36263648
#endif
36273649

3650+
#if defined(__riscv)
3651+
ggml_init_riscv_arch_features();
3652+
#endif
3653+
36283654
is_first_call = false;
36293655
}
36303656

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,10 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
583583
if (ggml_cpu_has_riscv_v()) {
584584
features.push_back({ "RISCV_V", "1" });
585585
}
586+
if (ggml_cpu_get_rvv_vlen() > 0) {
587+
static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
588+
features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
589+
}
586590
if (ggml_cpu_has_vsx()) {
587591
features.push_back({ "VSX", "1" });
588592
}

ggml/src/ggml-cpu/repack.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2169,7 +2169,8 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
21692169
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 8, 8, GGML_TYPE_Q8_0> iq4_nl_8x8_q8_0;
21702170

21712171
if (cur->type == GGML_TYPE_Q4_0) {
2172-
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
2172+
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)
2173+
|| (ggml_cpu_has_riscv_v() && (ggml_cpu_get_rvv_vlen() >= QK4_0))) {
21732174
if (cur->ne[1] % 8 == 0) {
21742175
return &q4_0_8x8_q8_0;
21752176
}

0 commit comments

Comments
 (0)