@@ -4495,6 +4495,128 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
44954495 }
44964496 }
44974497 }
4498+ Opcode :: ExtendedPairwiseAddSigned | Opcode :: ExtendedPairwiseAddUnsigned => {
4499+ // Extended pairwise addition instructions computes extended sums within adjacent
4500+ // pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
4501+ // Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
4502+ /*
4503+ let input_ty = ctx.input_ty(insn, 0);
4504+ let output_ty = ctx.output_ty(insn, 0);
4505+ let src = put_input_in_reg(ctx, inputs[0]);
4506+ let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4507+ unreachable!();
4508+ match op {
4509+ Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
4510+ (types::I8X16, types::I16X8) => {
4511+ static MUL_CONST: [u8; 16] = [0x01; 16];
4512+ let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4513+ let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4514+ ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4515+ ctx.emit(Inst::xmm_mov(
4516+ SseOpcode::Movdqa,
4517+ RegMem::reg(mul_const_reg.to_reg()),
4518+ dst,
4519+ ));
4520+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
4521+ }
4522+ (types::I16X8, types::I32X4) => {
4523+ static MUL_CONST: [u8; 16] = [
4524+ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
4525+ 0x01, 0x00, 0x01, 0x00,
4526+ ];
4527+ let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4528+ let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4529+ ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
4530+ ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4531+ ctx.emit(Inst::xmm_rm_r(
4532+ SseOpcode::Pmaddwd,
4533+ RegMem::reg(mul_const_reg.to_reg()),
4534+ dst,
4535+ ));
4536+ }
4537+ _ => unreachable!(
4538+ "Type pattern not supported {:?}-{:?} not supported for {:?}.",
4539+ input_ty, output_ty, op
4540+ ),
4541+ },
4542+ Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
4543+ (types::I8X16, types::I16X8) => {
4544+ static MUL_CONST: [u8; 16] = [0x01; 16];
4545+ let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4546+ let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4547+ ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4548+ ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4549+ ctx.emit(Inst::xmm_rm_r(
4550+ SseOpcode::Pmaddubsw,
4551+ RegMem::reg(mul_const_reg.to_reg()),
4552+ dst,
4553+ ));
4554+ }
4555+ (types::I16X8, types::I32X4) => {
4556+ static PXOR_CONST: [u8; 16] = [
4557+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
4558+ 0x00, 0x80, 0x00, 0x80,
4559+ ];
4560+ let pxor_const =
4561+ ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
4562+ let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4563+ ctx.emit(Inst::xmm_load_const(
4564+ pxor_const,
4565+ pxor_const_reg,
4566+ types::I16X8,
4567+ ));
4568+ ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4569+ ctx.emit(Inst::xmm_rm_r(
4570+ SseOpcode::Pxor,
4571+ RegMem::reg(pxor_const_reg.to_reg()),
4572+ dst,
4573+ ));
4574+
4575+ static MADD_CONST: [u8; 16] = [
4576+ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
4577+ 0x01, 0x00, 0x01, 0x00,
4578+ ];
4579+ let madd_const =
4580+ ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
4581+ let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4582+ ctx.emit(Inst::xmm_load_const(
4583+ madd_const,
4584+ madd_const_reg,
4585+ types::I16X8,
4586+ ));
4587+ ctx.emit(Inst::xmm_rm_r(
4588+ SseOpcode::Pmaddwd,
4589+ RegMem::reg(madd_const_reg.to_reg()),
4590+ dst,
4591+ ));
4592+
4593+ static ADDD_CONST2: [u8; 16] = [
4594+ 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
4595+ 0x00, 0x00, 0x01, 0x00,
4596+ ];
4597+ let addd_const2 =
4598+ ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
4599+ let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4600+ ctx.emit(Inst::xmm_load_const(
4601+ addd_const2,
4602+ addd_const2_reg,
4603+ types::I16X8,
4604+ ));
4605+ ctx.emit(Inst::xmm_rm_r(
4606+ SseOpcode::Paddd,
4607+ RegMem::reg(addd_const2_reg.to_reg()),
4608+ dst,
4609+ ));
4610+ }
4611+ _ => unreachable!(
4612+ "Type pattern not supported {:?}-{:?} not supported for {:?}.",
4613+ input_ty, output_ty, op
4614+ ),
4615+ },
4616+ _ => unreachable!("{:?} not supported.", op),
4617+ }
4618+ */
4619+ }
44984620 Opcode :: UwidenHigh | Opcode :: UwidenLow | Opcode :: SwidenHigh | Opcode :: SwidenLow => {
44994621 let input_ty = ctx. input_ty ( insn, 0 ) ;
45004622 let output_ty = ctx. output_ty ( insn, 0 ) ;
0 commit comments