Skip to content

Commit 55deb32

Browse files
ebiggerskdave
authored andcommitted
btrfs-progs: crypto: x86/crc32c - simplify code for handling fewer than 200 bytes
(Linux kernel patch 84ebf9dbe652355461b3e2f4693ce7b7402c30ca). The assembly code in crc32c-pcl-intel-asm_64.S is invoked only for lengths >= 512, due to the overhead of saving and restoring FPU state. Therefore, it is unnecessary for this code to be excessively "optimized" for lengths < 200. Eliminate the excessive unrolling of this part of the code and use a more straightforward qword-at-a-time loop. Note: the part of the code in question is not entirely redundant, as it is still used to process any remainder mod 24, as well as any remaining data when fewer than 200 bytes remain after least one 3072-byte chunk. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 0f5b8a5 commit 55deb32

File tree

1 file changed

+33
-83
lines changed

1 file changed

+33
-83
lines changed

crypto/crc32c-pcl-intel-asm_64.S

Lines changed: 33 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,10 @@
5858
.quad .Lcrc_\i
5959
.endm
6060

61-
.macro JNC_LESS_THAN j
62-
jnc .Lless_than_\j
63-
.endm
64-
65-
# Define threshold where buffers are considered "small" and routed to more
66-
# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
67-
# SMALL_SIZE can be no larger than 255.
68-
61+
# Define threshold below which buffers are considered "small" and routed to
62+
# regular CRC code that does not interleave the CRC instructions.
6963
#define SMALL_SIZE 200
7064

71-
.if (SMALL_SIZE > 255)
72-
.error "SMALL_ SIZE must be < 256"
73-
.endif
74-
7565
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
7666

7767
.text
@@ -105,25 +95,18 @@ crc_pcl:
10595
## Move crc_init for Linux to a different
10696
mov crc_init_arg, crc_init
10797

98+
mov %bufp, bufptmp # rdi = *buf
99+
cmp $SMALL_SIZE, len
100+
jb .Lsmall
101+
108102
################################################################
109103
## 1) ALIGN:
110104
################################################################
111-
112-
mov %bufp, bufptmp # rdi = *buf
113105
neg %bufp
114106
and $7, %bufp # calculate the unalignment amount of
115107
# the address
116108
je .Lproc_block # Skip if aligned
117109

118-
## If len is less than 8 and we're unaligned, we need to jump
119-
## to special code to avoid reading beyond the end of the buffer
120-
cmp $8, len
121-
jae .Ldo_align
122-
# less_than_8 expects length in upper 3 bits of len_dw
123-
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
124-
shl $32-3+1, len_dw
125-
jmp .Lless_than_8_post_shl1
126-
127110
.Ldo_align:
128111
#### Calculate CRC of unaligned bytes of the buffer (if any)
129112
movq (bufptmp), tmp # load a quadward from the buffer
@@ -149,9 +132,6 @@ crc_pcl:
149132
jae .Lfull_block
150133

151134
.Lcontinue_block:
152-
cmpq $SMALL_SIZE, len
153-
jb .Lsmall
154-
155135
## len < 128*24
156136
movq $2731, %rax # 2731 = ceil(2^16 / 24)
157137
mul len_dw
@@ -250,68 +230,38 @@ LABEL crc_ 0
250230
mov tmp, len
251231
cmp $128*24, tmp
252232
jae .Lfull_block
253-
cmp $24, tmp
233+
cmp $SMALL_SIZE, tmp
254234
jae .Lcontinue_block
255235

256-
.Lless_than_24:
257-
shl $32-4, len_dw # less_than_16 expects length
258-
# in upper 4 bits of len_dw
259-
jnc .Lless_than_16
260-
crc32q (bufptmp), crc_init
261-
crc32q 8(bufptmp), crc_init
262-
jz .Ldo_return
263-
add $16, bufptmp
264-
# len is less than 8 if we got here
265-
# less_than_8 expects length in upper 3 bits of len_dw
266-
# less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
267-
shl $2, len_dw
268-
jmp .Lless_than_8_post_shl1
269-
270236
#######################################################################
271-
## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
237+
## 6) Process any remainder without interleaving:
272238
#######################################################################
273239
.Lsmall:
274-
shl $32-8, len_dw # Prepare len_dw for less_than_256
275-
j=256
276-
.rept 5 # j = {256, 128, 64, 32, 16}
277-
.altmacro
278-
LABEL less_than_ %j # less_than_j: Length should be in
279-
# upper lg(j) bits of len_dw
280-
j=(j/2)
281-
shl $1, len_dw # Get next MSB
282-
JNC_LESS_THAN %j
283-
.noaltmacro
284-
i=0
285-
.rept (j/8)
286-
crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
287-
i=i+8
288-
.endr
289-
jz .Ldo_return # Return if remaining length is zero
290-
add $j, bufptmp # Advance buf
291-
.endr
292-
293-
.Lless_than_8: # Length should be stored in
294-
# upper 3 bits of len_dw
295-
shl $1, len_dw
296-
.Lless_than_8_post_shl1:
297-
jnc .Lless_than_4
298-
crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
299-
jz .Ldo_return # return if remaining data is zero
300-
add $4, bufptmp
301-
.Lless_than_4: # Length should be stored in
302-
# upper 2 bits of len_dw
303-
shl $1, len_dw
304-
jnc .Lless_than_2
305-
crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
306-
jz .Ldo_return # return if remaining data is zero
307-
add $2, bufptmp
308-
.Lless_than_2: # Length should be stored in the MSB
309-
# of len_dw
310-
shl $1, len_dw
311-
jnc .Lless_than_1
312-
crc32b (bufptmp), crc_init_dw # CRC of 1 byte
313-
.Lless_than_1: # Length should be zero
314-
.Ldo_return:
240+
test len, len
241+
jz .Ldone
242+
mov len_dw, %eax
243+
shr $3, %eax
244+
jz .Ldo_dword
245+
.Ldo_qwords:
246+
crc32q (bufptmp), crc_init
247+
add $8, bufptmp
248+
dec %eax
249+
jnz .Ldo_qwords
250+
.Ldo_dword:
251+
test $4, len_dw
252+
jz .Ldo_word
253+
crc32l (bufptmp), crc_init_dw
254+
add $4, bufptmp
255+
.Ldo_word:
256+
test $2, len_dw
257+
jz .Ldo_byte
258+
crc32w (bufptmp), crc_init_dw
259+
add $2, bufptmp
260+
.Ldo_byte:
261+
test $1, len_dw
262+
jz .Ldone
263+
crc32b (bufptmp), crc_init_dw
264+
.Ldone:
315265
movq crc_init, %rax
316266
popq %rsi
317267
popq %rdi

0 commit comments

Comments
 (0)