Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions dev/aarch64_clean/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,9 @@ static MLD_INLINE int mld_polyz_unpack_19_native(int32_t *r, const uint8_t *buf)
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
MLD_MUST_CHECK_RETURN_VALUE
static MLD_INLINE int mld_poly_pointwise_montgomery_native(
int32_t out[MLDSA_N], const int32_t in0[MLDSA_N],
const int32_t in1[MLDSA_N])
int32_t a[MLDSA_N], const int32_t b[MLDSA_N])
{
mld_poly_pointwise_montgomery_asm(out, in0, in1);
mld_poly_pointwise_montgomery_asm(a, b);
return MLD_NATIVE_FUNC_SUCCESS;
}
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
8 changes: 3 additions & 5 deletions dev/aarch64_clean/src/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,17 @@ void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
#define mld_poly_pointwise_montgomery_asm \
MLD_NAMESPACE(poly_pointwise_montgomery_asm)
void mld_poly_pointwise_montgomery_asm(int32_t *r, const int32_t *a,
const int32_t *b)
void mld_poly_pointwise_montgomery_asm(int32_t *a, const int32_t *b)
/* This must be kept in sync with the HOL-Light specification
* in proofs/hol_light/aarch64/proofs/mldsa_pointwise.ml */
__contract__(
requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(a, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(b, sizeof(int32_t) * MLDSA_N))
/* check-magic: off */
requires(array_abs_bound(a, 0, MLDSA_N, 75423753))
requires(array_abs_bound(b, 0, MLDSA_N, 75423753))
assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(r, 0, MLDSA_N, 8380417))
assigns(memory_slice(a, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(a, 0, MLDSA_N, 8380417))
/* check-magic: on */
);
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
16 changes: 7 additions & 9 deletions dev/aarch64_clean/src/pointwise_montgomery.S
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
smull2 \dh\().2d, \a\().4s, \b\().4s
.endm

out_ptr .req x0
a_ptr .req x1
b_ptr .req x2
a_ptr .req x0
b_ptr .req x1

count .req x3
wtmp .req w3
Expand Down Expand Up @@ -89,10 +88,10 @@ MLD_ASM_FN_SYMBOL(poly_pointwise_montgomery_asm)
mov count, #(MLDSA_N / 4)

poly_pointwise_montgomery_loop_start:
ldr q_a_0, [a_ptr, #0*16]
ldr q_a_1, [a_ptr, #1*16]
ldr q_a_2, [a_ptr, #2*16]
ldr q_a_3, [a_ptr, #3*16]
ldr q_a_0, [a_ptr], #4*16

ldr q_b_1, [b_ptr, #1*16]
ldr q_b_2, [b_ptr, #2*16]
Expand All @@ -116,17 +115,16 @@ poly_pointwise_montgomery_loop_start:
//
// See description of mld_montgomery_reduce() in mldsa/src/reduce.h.

str q_c_1, [out_ptr, #1*16]
str q_c_2, [out_ptr, #2*16]
str q_c_3, [out_ptr, #3*16]
str q_c_0, [out_ptr], #4*16
str q_c_1, [a_ptr, #1*16]
str q_c_2, [a_ptr, #2*16]
str q_c_3, [a_ptr, #3*16]
str q_c_0, [a_ptr], #4*16

subs count, count, #4
cbnz count, poly_pointwise_montgomery_loop_start

ret

.unreq out_ptr
.unreq a_ptr
.unreq b_ptr
.unreq count
Expand Down
5 changes: 2 additions & 3 deletions dev/aarch64_opt/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,9 @@ static MLD_INLINE int mld_polyz_unpack_19_native(int32_t *r, const uint8_t *buf)
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
MLD_MUST_CHECK_RETURN_VALUE
static MLD_INLINE int mld_poly_pointwise_montgomery_native(
int32_t out[MLDSA_N], const int32_t in0[MLDSA_N],
const int32_t in1[MLDSA_N])
int32_t a[MLDSA_N], const int32_t b[MLDSA_N])
{
mld_poly_pointwise_montgomery_asm(out, in0, in1);
mld_poly_pointwise_montgomery_asm(a, b);
return MLD_NATIVE_FUNC_SUCCESS;
}
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
8 changes: 3 additions & 5 deletions dev/aarch64_opt/src/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,17 @@ void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
#define mld_poly_pointwise_montgomery_asm \
MLD_NAMESPACE(poly_pointwise_montgomery_asm)
void mld_poly_pointwise_montgomery_asm(int32_t *r, const int32_t *a,
const int32_t *b)
void mld_poly_pointwise_montgomery_asm(int32_t *a, const int32_t *b)
/* This must be kept in sync with the HOL-Light specification
* in proofs/hol_light/aarch64/proofs/mldsa_pointwise.ml */
__contract__(
requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(a, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(b, sizeof(int32_t) * MLDSA_N))
/* check-magic: off */
requires(array_abs_bound(a, 0, MLDSA_N, 75423753))
requires(array_abs_bound(b, 0, MLDSA_N, 75423753))
assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(r, 0, MLDSA_N, 8380417))
assigns(memory_slice(a, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(a, 0, MLDSA_N, 8380417))
/* check-magic: on */
);
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
16 changes: 7 additions & 9 deletions dev/aarch64_opt/src/pointwise_montgomery.S
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
smull2 \dh\().2d, \a\().4s, \b\().4s
.endm

out_ptr .req x0
a_ptr .req x1
b_ptr .req x2
a_ptr .req x0
b_ptr .req x1

count .req x3
wtmp .req w3
Expand Down Expand Up @@ -89,10 +88,10 @@ MLD_ASM_FN_SYMBOL(poly_pointwise_montgomery_asm)
mov count, #(MLDSA_N / 4)

poly_pointwise_montgomery_loop_start:
ldr q_a_0, [a_ptr, #0*16]
ldr q_a_1, [a_ptr, #1*16]
ldr q_a_2, [a_ptr, #2*16]
ldr q_a_3, [a_ptr, #3*16]
ldr q_a_0, [a_ptr], #4*16

ldr q_b_1, [b_ptr, #1*16]
ldr q_b_2, [b_ptr, #2*16]
Expand All @@ -116,17 +115,16 @@ poly_pointwise_montgomery_loop_start:
//
// See description of mld_montgomery_reduce() in mldsa/src/reduce.h.

str q_c_1, [out_ptr, #1*16]
str q_c_2, [out_ptr, #2*16]
str q_c_3, [out_ptr, #3*16]
str q_c_0, [out_ptr], #4*16
str q_c_1, [a_ptr, #1*16]
str q_c_2, [a_ptr, #2*16]
str q_c_3, [a_ptr, #3*16]
str q_c_0, [a_ptr], #4*16

subs count, count, #4
cbnz count, poly_pointwise_montgomery_loop_start

ret

.unreq out_ptr
.unreq a_ptr
.unreq b_ptr
.unreq count
Expand Down
4 changes: 2 additions & 2 deletions dev/x86_64/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,13 @@ static MLD_INLINE int mld_polyz_unpack_19_native(int32_t *r, const uint8_t *a)
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
MLD_MUST_CHECK_RETURN_VALUE
static MLD_INLINE int mld_poly_pointwise_montgomery_native(
int32_t c[MLDSA_N], const int32_t a[MLDSA_N], const int32_t b[MLDSA_N])
int32_t a[MLDSA_N], const int32_t b[MLDSA_N])
{
if (!mld_sys_check_capability(MLD_SYS_CAP_AVX2))
{
return MLD_NATIVE_FUNC_FALLBACK;
}
mld_pointwise_avx2(c, a, b, mld_qdata);
mld_pointwise_avx2(a, b, mld_qdata);
return MLD_NATIVE_FUNC_SUCCESS;
}
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
8 changes: 3 additions & 5 deletions dev/x86_64/src/arith_native_x86_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,18 @@ void mld_polyz_unpack_19_avx2(int32_t *r, const uint8_t *a);
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API */

#define mld_pointwise_avx2 MLD_NAMESPACE(pointwise_avx2)
void mld_pointwise_avx2(int32_t *c, const int32_t *a, const int32_t *b,
const int32_t *qdata)
void mld_pointwise_avx2(int32_t *a, const int32_t *b, const int32_t *qdata)
/* This must be kept in sync with the HOL-Light specification
* in proofs/hol_light/x86_64/proofs/mldsa_pointwise.ml */
__contract__(
requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(a, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(b, sizeof(int32_t) * MLDSA_N))
/* check-magic: off */
requires(array_abs_bound(a, 0, MLDSA_N, 75423753))
requires(array_abs_bound(b, 0, MLDSA_N, 75423753))
requires(qdata == mld_qdata)
assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(c, 0, MLDSA_N, 8380417))
assigns(memory_slice(a, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(a, 0, MLDSA_N, 8380417))
/* check-magic: on */
);

Expand Down
37 changes: 18 additions & 19 deletions dev/x86_64/src/pointwise.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,35 @@
.text

/*
* void mld_pointwise_avx2(__m256i *c, const __m256i *a, const __m256i *b, const __m256i *qdata)
* void mld_pointwise_avx2(__m256i *a, const __m256i *b, const __m256i *qdata)
*
* Pointwise multiplication of polynomials in NTT domain with Montgomery reduction
* Pointwise multiplication of polynomials in NTT domain with Montgomery
* reduction. Destructive in the first argument: a := a * b * R^{-1} mod q.
*
* Arguments:
* rdi: pointer to output polynomial c
* rsi: pointer to input polynomial a
* rdx: pointer to input polynomial b
* rcx: pointer to qdata constants
* rdi: pointer to first input/output polynomial a
* rsi: pointer to second input polynomial b
* rdx: pointer to qdata constants
*/
.balign 4
.global MLD_ASM_NAMESPACE(pointwise_avx2)
MLD_ASM_FN_SYMBOL(pointwise_avx2)

// Load constants
vmovdqa ymm0, [rcx + (MLD_AVX2_BACKEND_DATA_OFFSET_8XQINV)*4]
vmovdqa ymm1, [rcx + (MLD_AVX2_BACKEND_DATA_OFFSET_8XQ)*4]
vmovdqa ymm0, [rdx + (MLD_AVX2_BACKEND_DATA_OFFSET_8XQINV)*4]
vmovdqa ymm1, [rdx + (MLD_AVX2_BACKEND_DATA_OFFSET_8XQ)*4]

xor eax, eax
pointwise_avx2_looptop1:
// Handle 24 = 3*8 coefficients per iteration

// Load
vmovdqa ymm2, [rsi]
vmovdqa ymm4, [rsi + 32]
vmovdqa ymm6, [rsi + 64]
vmovdqa ymm10, [rdx]
vmovdqa ymm12, [rdx + 32]
vmovdqa ymm14, [rdx + 64]
vmovdqa ymm2, [rdi]
vmovdqa ymm4, [rdi + 32]
vmovdqa ymm6, [rdi + 64]
vmovdqa ymm10, [rsi]
vmovdqa ymm12, [rsi + 32]
vmovdqa ymm14, [rsi + 64]
vpsrlq ymm3, ymm2, 32
vpsrlq ymm5, ymm4, 32
vmovshdup ymm7, ymm6
Expand Down Expand Up @@ -122,7 +122,6 @@ pointwise_avx2_looptop1:

add rdi, 96
add rsi, 96
add rdx, 96
add eax, 1
cmp eax, 10
jb pointwise_avx2_looptop1
Expand All @@ -131,10 +130,10 @@ pointwise_avx2_looptop1:
// Handle the last 256 % 24 = 16 = 2*8 coefficients, left over by the loop

// Load
vmovdqa ymm2, [rsi]
vmovdqa ymm4, [rsi + 32]
vmovdqa ymm10, [rdx]
vmovdqa ymm12, [rdx + 32]
vmovdqa ymm2, [rdi]
vmovdqa ymm4, [rdi + 32]
vmovdqa ymm10, [rsi]
vmovdqa ymm12, [rsi + 32]
vpsrlq ymm3, ymm2, 32
vpsrlq ymm5, ymm4, 32
vmovshdup ymm11, ymm10
Expand Down
18 changes: 9 additions & 9 deletions integration/opentitan/reduce_alloc.patch
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ index be11f20..0000000 100644
- kOtcryptoMldsa44WorkBufferKeypairWords = 32992 / sizeof(uint32_t),
- kOtcryptoMldsa44WorkBufferSignWords = 32448 / sizeof(uint32_t),
- kOtcryptoMldsa44WorkBufferVerifyWords = 22464 / sizeof(uint32_t),
+ kOtcryptoMldsa44WorkBufferKeypairWords = 14624 / sizeof(uint32_t),
+ kOtcryptoMldsa44WorkBufferSignWords = 14144 / sizeof(uint32_t),
+ kOtcryptoMldsa44WorkBufferVerifyWords = 20416 / sizeof(uint32_t),
+ kOtcryptoMldsa44WorkBufferKeypairWords = 13600 / sizeof(uint32_t),
+ kOtcryptoMldsa44WorkBufferSignWords = 13120 / sizeof(uint32_t),
+ kOtcryptoMldsa44WorkBufferVerifyWords = 19392 / sizeof(uint32_t),

- kOtcryptoMldsa65WorkBufferKeypairWords = 46304 / sizeof(uint32_t),
- kOtcryptoMldsa65WorkBufferSignWords = 44768 / sizeof(uint32_t),
- kOtcryptoMldsa65WorkBufferVerifyWords = 30720 / sizeof(uint32_t),
+ kOtcryptoMldsa65WorkBufferKeypairWords = 20768 / sizeof(uint32_t),
+ kOtcryptoMldsa65WorkBufferSignWords = 18272 / sizeof(uint32_t),
+ kOtcryptoMldsa65WorkBufferVerifyWords = 27648 / sizeof(uint32_t),
+ kOtcryptoMldsa65WorkBufferKeypairWords = 19744 / sizeof(uint32_t),
+ kOtcryptoMldsa65WorkBufferSignWords = 17248 / sizeof(uint32_t),
+ kOtcryptoMldsa65WorkBufferVerifyWords = 26624 / sizeof(uint32_t),

- kOtcryptoMldsa87WorkBufferKeypairWords = 62688 / sizeof(uint32_t),
- kOtcryptoMldsa87WorkBufferSignWords = 59104 / sizeof(uint32_t),
- kOtcryptoMldsa87WorkBufferVerifyWords = 41216 / sizeof(uint32_t),
+ kOtcryptoMldsa87WorkBufferKeypairWords = 26912 / sizeof(uint32_t),
+ kOtcryptoMldsa87WorkBufferSignWords = 22368 / sizeof(uint32_t),
+ kOtcryptoMldsa87WorkBufferVerifyWords = 36096 / sizeof(uint32_t),
+ kOtcryptoMldsa87WorkBufferKeypairWords = 25888 / sizeof(uint32_t),
+ kOtcryptoMldsa87WorkBufferSignWords = 21344 / sizeof(uint32_t),
+ kOtcryptoMldsa87WorkBufferVerifyWords = 35072 / sizeof(uint32_t),
};

30 changes: 15 additions & 15 deletions mldsa/mldsa_native.h
Original file line number Diff line number Diff line change
Expand Up @@ -954,21 +954,21 @@ int MLD_API_NAMESPACE(pk_from_sk)(
#define MLD_TOTAL_ALLOC_87_SIGN 108224
#define MLD_TOTAL_ALLOC_87_VERIFY 91360
#else /* MLD_API_LEGACY_CONFIG || !MLD_CONFIG_REDUCE_RAM */
#define MLD_TOTAL_ALLOC_44_KEYPAIR_NO_PCT 14624
#define MLD_TOTAL_ALLOC_44_KEYPAIR_PCT 24160
#define MLD_TOTAL_ALLOC_44_PK_FROM_SK 22752
#define MLD_TOTAL_ALLOC_44_SIGN 14144
#define MLD_TOTAL_ALLOC_44_VERIFY 20416
#define MLD_TOTAL_ALLOC_65_KEYPAIR_NO_PCT 20768
#define MLD_TOTAL_ALLOC_65_KEYPAIR_PCT 32928
#define MLD_TOTAL_ALLOC_65_PK_FROM_SK 31968
#define MLD_TOTAL_ALLOC_65_SIGN 18272
#define MLD_TOTAL_ALLOC_65_VERIFY 27648
#define MLD_TOTAL_ALLOC_87_KEYPAIR_NO_PCT 26912
#define MLD_TOTAL_ALLOC_87_KEYPAIR_PCT 43328
#define MLD_TOTAL_ALLOC_87_PK_FROM_SK 42208
#define MLD_TOTAL_ALLOC_87_SIGN 22368
#define MLD_TOTAL_ALLOC_87_VERIFY 36096
#define MLD_TOTAL_ALLOC_44_KEYPAIR_NO_PCT 13600
#define MLD_TOTAL_ALLOC_44_KEYPAIR_PCT 23136
#define MLD_TOTAL_ALLOC_44_PK_FROM_SK 21728
#define MLD_TOTAL_ALLOC_44_SIGN 13120
#define MLD_TOTAL_ALLOC_44_VERIFY 19392
#define MLD_TOTAL_ALLOC_65_KEYPAIR_NO_PCT 19744
#define MLD_TOTAL_ALLOC_65_KEYPAIR_PCT 31904
#define MLD_TOTAL_ALLOC_65_PK_FROM_SK 30944
#define MLD_TOTAL_ALLOC_65_SIGN 17248
#define MLD_TOTAL_ALLOC_65_VERIFY 26624
#define MLD_TOTAL_ALLOC_87_KEYPAIR_NO_PCT 25888
#define MLD_TOTAL_ALLOC_87_KEYPAIR_PCT 42304
#define MLD_TOTAL_ALLOC_87_PK_FROM_SK 41184
#define MLD_TOTAL_ALLOC_87_SIGN 21344
#define MLD_TOTAL_ALLOC_87_VERIFY 35072
#endif /* !(MLD_API_LEGACY_CONFIG || !MLD_CONFIG_REDUCE_RAM) */
/* check-magic: on */

Expand Down
5 changes: 2 additions & 3 deletions mldsa/src/native/aarch64/meta.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,9 @@ static MLD_INLINE int mld_polyz_unpack_19_native(int32_t *r, const uint8_t *buf)
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
MLD_MUST_CHECK_RETURN_VALUE
static MLD_INLINE int mld_poly_pointwise_montgomery_native(
int32_t out[MLDSA_N], const int32_t in0[MLDSA_N],
const int32_t in1[MLDSA_N])
int32_t a[MLDSA_N], const int32_t b[MLDSA_N])
{
mld_poly_pointwise_montgomery_asm(out, in0, in1);
mld_poly_pointwise_montgomery_asm(a, b);
return MLD_NATIVE_FUNC_SUCCESS;
}
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
8 changes: 3 additions & 5 deletions mldsa/src/native/aarch64/src/arith_native_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,17 @@ void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
defined(MLD_CONFIG_REDUCE_RAM) || defined(MLD_UNIT_TEST)
#define mld_poly_pointwise_montgomery_asm \
MLD_NAMESPACE(poly_pointwise_montgomery_asm)
void mld_poly_pointwise_montgomery_asm(int32_t *r, const int32_t *a,
const int32_t *b)
void mld_poly_pointwise_montgomery_asm(int32_t *a, const int32_t *b)
/* This must be kept in sync with the HOL-Light specification
* in proofs/hol_light/aarch64/proofs/mldsa_pointwise.ml */
__contract__(
requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(a, sizeof(int32_t) * MLDSA_N))
requires(memory_no_alias(b, sizeof(int32_t) * MLDSA_N))
/* check-magic: off */
requires(array_abs_bound(a, 0, MLDSA_N, 75423753))
requires(array_abs_bound(b, 0, MLDSA_N, 75423753))
assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(r, 0, MLDSA_N, 8380417))
assigns(memory_slice(a, sizeof(int32_t) * MLDSA_N))
ensures(array_abs_bound(a, 0, MLDSA_N, 8380417))
/* check-magic: on */
);
#endif /* !MLD_CONFIG_NO_SIGN_API || !MLD_CONFIG_NO_VERIFY_API || \
Expand Down
Loading
Loading