Skip to content

Commit f2c0b90

Browse files
committed
Vectorise
1 parent 54fdb85 commit f2c0b90

File tree

1 file changed

+151
-14
lines changed

1 file changed

+151
-14
lines changed

src/lib.rs

Lines changed: 151 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -536,38 +536,151 @@ pub fn decode_alternative<T: AsRef<[u8]>>(input: T) -> Result<u128, DecodeError>
536536

537537
// Common encoding function
538538
unsafe fn encode_impl(num: u128, digits: usize, buf: &mut [u8], encode_table: &[u8; 62]) -> usize {
539-
if let Ok(num) = num.try_into() {
540-
return encode_impl_u64(num, digits, buf, encode_table);
539+
if let Ok(num) = TryInto::<u64>::try_into(num) {
540+
encode_impl_u64(num, digits, buf, encode_table)
541+
} else if digits > 20 {
542+
return encode_impl_over_20_digits(num, digits, buf, encode_table);
543+
} else if digits == 20 {
544+
// (AAAAAAAAAA, BBBBBBBBBB)
545+
let (first_u64, second_u64) = div_base_to_10(num);
546+
// AAAAAAAAAA
547+
let first_u64 = first_u64 as u64;
548+
549+
return encode_impl_20_digits(first_u64, second_u64, buf, encode_table);
550+
} else {
551+
// digits between 11 and 20 (10 digits would always fit into a u64, which we checked first)
552+
return encode_impl_over_10_under_20_digits(num, digits, buf, encode_table);
553+
}
554+
}
555+
556+
// >20 digits requires two u128 divisions
557+
unsafe fn encode_impl_over_20_digits(
558+
num: u128,
559+
digits: usize,
560+
buf: &mut [u8],
561+
encode_table: &[u8; 62],
562+
) -> usize {
563+
// input: AABBBBBBBBBBCCCCCCCCCC
564+
//
565+
// (AABBBBBBBBBB, CCCCCCCCCC)
566+
let (num, third_u64) = div_base_to_10(num);
567+
// (AA, BBBBBBBBBB)
568+
let (first_u64, second_u64) = div_base_to_10(num);
569+
// AA - no more than two digits as num was 22 digits
570+
let first_u64 = first_u64 as u64;
571+
572+
// encode the first one or two digits
573+
if digits == 21 {
574+
*buf.get_unchecked_mut(0) = *encode_table.get_unchecked(first_u64 as usize);
575+
} else {
576+
let second_digit = first_u64 % BASE;
577+
let first_digit = first_u64 / BASE;
578+
*buf.get_unchecked_mut(1) = *encode_table.get_unchecked(second_digit as usize);
579+
*buf.get_unchecked_mut(0) = *encode_table.get_unchecked(first_digit as usize);
580+
}
581+
582+
// encode the last 20 digits
583+
encode_impl_20_digits(
584+
second_u64,
585+
third_u64,
586+
&mut buf[(digits - 20)..],
587+
encode_table,
588+
);
589+
590+
digits
591+
}
592+
593+
// 20 digit needs only u64 or u32 divisions and can be vectorised (four u32 divisions at once)
594+
unsafe fn encode_impl_20_digits(
595+
first_u64: u64,
596+
second_u64: u64,
597+
buf: &mut [u8],
598+
encode_table: &[u8; 62],
599+
) -> usize {
600+
let first_u32 = (first_u64 / BASE_TO_5) as u32;
601+
let second_u32 = (first_u64 % BASE_TO_5) as u32;
602+
let third_u32 = (second_u64 / BASE_TO_5) as u32;
603+
let fourth_u32 = (second_u64 % BASE_TO_5) as u32;
604+
605+
// [AAAAA, BBBBB, CCCCC, DDDDD]
606+
let mut nums = [first_u32, second_u32, third_u32, fourth_u32];
607+
const STARTING_WRITE_IDXS: [usize; 4] = [5, 10, 15, 20];
608+
609+
for i in 0..5 {
610+
nums.iter_mut()
611+
.zip(STARTING_WRITE_IDXS)
612+
.for_each(|(num, starting_write_idx)| {
613+
let quotient = num.wrapping_div(BASE as u32);
614+
let remainder = (*num - (BASE as u32) * quotient) as usize;
615+
*num = quotient;
616+
617+
*buf.get_unchecked_mut(starting_write_idx - i - 1) =
618+
*encode_table.get_unchecked(remainder)
619+
});
541620
}
542621

622+
20
623+
}
624+
625+
// 10-20 digit implementation needs only one u128 division, then a u64 division per digit
626+
unsafe fn encode_impl_over_10_under_20_digits(
627+
num: u128,
628+
digits: usize,
629+
buf: &mut [u8],
630+
encode_table: &[u8; 62],
631+
) -> usize {
543632
let mut write_idx = digits;
544633
let mut digit_index = 0_usize;
545634

546-
let (mut num, mut u64_num) = div_base_to_10(num);
635+
let (first_u64, mut num) = div_base_to_10(num);
636+
// as this number is <20 digits, once we remove the rightmost 10 digits, the remainder is a u64.
637+
let first_u64 = first_u64 as u64;
547638

548639
while digit_index < digits {
549640
write_idx = write_idx.wrapping_sub(1);
550641

551-
let quotient = u64_num / BASE;
552-
let remainder = u64_num - quotient * BASE;
642+
let remainder = num % BASE;
643+
num /= BASE;
553644

554645
*buf.get_unchecked_mut(write_idx) = *encode_table.get_unchecked(remainder as usize);
555646

556647
digit_index = digit_index.wrapping_add(1);
557-
match digit_index {
558-
10 => {
559-
(num, u64_num) = div_base_to_10(num);
560-
}
561-
20 => u64_num = num as u64,
562-
_ => u64_num = quotient,
648+
if digit_index == 10 {
649+
num = first_u64
563650
}
564651
}
565652

566653
digits
567654
}
568655

656+
// u64 implementation can avoid any u128 operations
569657
unsafe fn encode_impl_u64(
570-
mut u64_num: u64,
658+
num: u64,
659+
digits: usize,
660+
buf: &mut [u8],
661+
encode_table: &[u8; 62],
662+
) -> usize {
663+
if digits == 11 {
664+
// ABBBBBBBBBB
665+
666+
// A
667+
let first_u64 = num / (BASE_TO_10 as u64);
668+
// BBBBBBBBBB
669+
let second_u64 = num % (BASE_TO_10 as u64);
670+
671+
*buf.get_unchecked_mut(0) = *encode_table.get_unchecked(first_u64 as usize);
672+
673+
encode_impl_u64_10_digits(second_u64, &mut buf[1..], encode_table);
674+
digits
675+
} else if digits == 10 {
676+
return encode_impl_u64_10_digits(num, buf, encode_table);
677+
} else {
678+
return encode_impl_u64_under_10_digits(num, digits, buf, encode_table);
679+
}
680+
}
681+
682+
unsafe fn encode_impl_u64_under_10_digits(
683+
mut num: u64,
571684
digits: usize,
572685
buf: &mut [u8],
573686
encode_table: &[u8; 62],
@@ -578,8 +691,8 @@ unsafe fn encode_impl_u64(
578691
while digit_index < digits {
579692
write_idx = write_idx.wrapping_sub(1);
580693

581-
let remainder = u64_num % BASE;
582-
u64_num /= BASE;
694+
let remainder = num % BASE;
695+
num /= BASE;
583696

584697
*buf.get_unchecked_mut(write_idx) = *encode_table.get_unchecked(remainder as usize);
585698

@@ -589,6 +702,30 @@ unsafe fn encode_impl_u64(
589702
digits
590703
}
591704

705+
unsafe fn encode_impl_u64_10_digits(num: u64, buf: &mut [u8], encode_table: &[u8; 62]) -> usize {
706+
let first_u32 = (num / BASE_TO_5) as u32;
707+
let second_u32 = (num % BASE_TO_5) as u32;
708+
709+
// [AAAAA, BBBBB]
710+
let mut nums = [first_u32, second_u32];
711+
const STARTING_WRITE_IDXS: [usize; 2] = [5, 10];
712+
713+
for i in 0..5 {
714+
nums.iter_mut()
715+
.zip(STARTING_WRITE_IDXS)
716+
.for_each(|(num, starting_write_idx)| {
717+
let quotient = num.wrapping_div(BASE as u32);
718+
let remainder = (*num - (BASE as u32) * quotient) as usize;
719+
*num = quotient;
720+
721+
*buf.get_unchecked_mut(starting_write_idx - i - 1) =
722+
*encode_table.get_unchecked(remainder)
723+
});
724+
}
725+
726+
10
727+
}
728+
592729
fn div_base_to_10(num: u128) -> (u128, u64) {
593730
let quotient = mulh(DIV_BASE_TO_10_MULTIPLY, num) >> DIV_BASE_TO_10_SHIFT;
594731
let remainder = num - BASE_TO_10 * quotient;

0 commit comments

Comments
 (0)