5858.quad .Lcrc_\i
5959.endm
6060
61- .macro JNC_LESS_THAN j
62- jnc .Lless_than_\j
63- .endm
64-
65- # Define threshold where buffers are considered "small" and routed to more
66- # efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
67- # SMALL_SIZE can be no larger than 255.
68-
61+ # Define threshold below which buffers are considered "small" and routed to
62+ # regular CRC code that does not interleave the CRC instructions.
6963#define SMALL_SIZE 200
7064
71- .if (SMALL_SIZE > 255 )
72- .error "SMALL_ SIZE must be < 256"
73- .endif
74-
7565# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
7666
7767.text
@@ -105,25 +95,18 @@ crc_pcl:
10595 ## Move crc_init for Linux to a different
10696 mov crc_init_arg, crc_init
10797
98+ mov %bufp, bufptmp # rdi = *buf
99+ cmp $SMALL_SIZE, len
100+ jb .Lsmall
101+
108102 ################################################################
109103 ## 1) ALIGN:
110104 ################################################################
111-
112- mov %bufp, bufptmp # rdi = *buf
113105 neg %bufp
114106 and $7 , %bufp # calculate the unalignment amount of
115107 # the address
116108 je .Lproc_block # Skip if aligned
117109
118- ## If len is less than 8 and we're unaligned, we need to jump
119- ## to special code to avoid reading beyond the end of the buffer
120- cmp $8 , len
121- jae .Ldo_align
122- # less_than_8 expects length in upper 3 bits of len_dw
123- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
124- shl $32 -3 +1 , len_dw
125- jmp .Lless_than_8_post_shl1
126-
127110.Ldo_align:
128111 #### Calculate CRC of unaligned bytes of the buffer (if any)
129112 movq (bufptmp), tmp # load a quadward from the buffer
@@ -149,9 +132,6 @@ crc_pcl:
149132 jae .Lfull_block
150133
151134.Lcontinue_block:
152- cmpq $SMALL_SIZE, len
153- jb .Lsmall
154-
155135 ## len < 128*24
156136 movq $2731 , %rax # 2731 = ceil(2^16 / 24)
157137 mul len_dw
@@ -250,68 +230,38 @@ LABEL crc_ 0
250230 mov tmp, len
251231 cmp $128*24 , tmp
252232 jae .Lfull_block
253- cmp $24 , tmp
233+ cmp $SMALL_SIZE , tmp
254234 jae .Lcontinue_block
255235
256- .Lless_than_24:
257- shl $32 -4 , len_dw # less_than_16 expects length
258- # in upper 4 bits of len_dw
259- jnc .Lless_than_16
260- crc32q (bufptmp), crc_init
261- crc32q 8 (bufptmp), crc_init
262- jz .Ldo_return
263- add $16 , bufptmp
264- # len is less than 8 if we got here
265- # less_than_8 expects length in upper 3 bits of len_dw
266- # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
267- shl $2 , len_dw
268- jmp .Lless_than_8_post_shl1
269-
270236 #######################################################################
271- ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
237+ ## 6) Process any remainder without interleaving:
272238 #######################################################################
273239.Lsmall:
274- shl $32 -8 , len_dw # Prepare len_dw for less_than_256
275- j =256
276- .rept 5 # j = {256, 128, 64, 32, 16}
277- .altmacro
278- LABEL less_than_ %j # less_than_j: Length should be in
279- # upper lg(j) bits of len_dw
280- j = (j/2 )
281- shl $1 , len_dw # Get next MSB
282- JNC_LESS_THAN %j
283- .noaltmacro
284- i =0
285- .rept (j/8 )
286- crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
287- i = i+8
288- .endr
289- jz .Ldo_return # Return if remaining length is zero
290- add $j, bufptmp # Advance buf
291- .endr
292-
293- .Lless_than_8: # Length should be stored in
294- # upper 3 bits of len_dw
295- shl $1 , len_dw
296- .Lless_than_8_post_shl1:
297- jnc .Lless_than_4
298- crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
299- jz .Ldo_return # return if remaining data is zero
300- add $4 , bufptmp
301- .Lless_than_4: # Length should be stored in
302- # upper 2 bits of len_dw
303- shl $1 , len_dw
304- jnc .Lless_than_2
305- crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
306- jz .Ldo_return # return if remaining data is zero
307- add $2 , bufptmp
308- .Lless_than_2: # Length should be stored in the MSB
309- # of len_dw
310- shl $1 , len_dw
311- jnc .Lless_than_1
312- crc32b (bufptmp), crc_init_dw # CRC of 1 byte
313- .Lless_than_1: # Length should be zero
314- .Ldo_return:
240+ test len, len
241+ jz .Ldone
242+ mov len_dw, %eax
243+ shr $3 , %eax
244+ jz .Ldo_dword
245+ .Ldo_qwords:
246+ crc32q (bufptmp), crc_init
247+ add $8 , bufptmp
248+ dec %eax
249+ jnz .Ldo_qwords
250+ .Ldo_dword:
251+ test $4 , len_dw
252+ jz .Ldo_word
253+ crc32l (bufptmp), crc_init_dw
254+ add $4 , bufptmp
255+ .Ldo_word:
256+ test $2 , len_dw
257+ jz .Ldo_byte
258+ crc32w (bufptmp), crc_init_dw
259+ add $2 , bufptmp
260+ .Ldo_byte:
261+ test $1 , len_dw
262+ jz .Ldone
263+ crc32b (bufptmp), crc_init_dw
264+ .Ldone:
315265 movq crc_init, %rax
316266 popq %rsi
317267 popq %rdi
0 commit comments