@@ -4927,6 +4927,128 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
49274927 }
49284928 }
49294929 }
4930+ Opcode :: ExtendedPairwiseAddSigned | Opcode :: ExtendedPairwiseAddUnsigned => {
4931+ // Extended pairwise addition instructions computes extended sums within adjacent
4932+ // pairs of lanes of a SIMD vector, producing a SIMD vector with half as many lanes.
4933+ // Instruction sequences taken from instruction SPEC PR https://github.com/WebAssembly/simd/pull/380
4934+ /*
4935+ let input_ty = ctx.input_ty(insn, 0);
4936+ let output_ty = ctx.output_ty(insn, 0);
4937+ let src = put_input_in_reg(ctx, inputs[0]);
4938+ let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
4939+ unreachable!();
4940+ match op {
4941+ Opcode::ExtendedPairwiseAddSigned => match (input_ty, output_ty) {
4942+ (types::I8X16, types::I16X8) => {
4943+ static MUL_CONST: [u8; 16] = [0x01; 16];
4944+ let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4945+ let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4946+ ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4947+ ctx.emit(Inst::xmm_mov(
4948+ SseOpcode::Movdqa,
4949+ RegMem::reg(mul_const_reg.to_reg()),
4950+ dst,
4951+ ));
4952+ ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src), dst));
4953+ }
4954+ (types::I16X8, types::I32X4) => {
4955+ static MUL_CONST: [u8; 16] = [
4956+ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
4957+ 0x01, 0x00, 0x01, 0x00,
4958+ ];
4959+ let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4960+ let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4961+ ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
4962+ ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4963+ ctx.emit(Inst::xmm_rm_r(
4964+ SseOpcode::Pmaddwd,
4965+ RegMem::reg(mul_const_reg.to_reg()),
4966+ dst,
4967+ ));
4968+ }
4969+ _ => unreachable!(
4970+ "Type pattern not supported {:?}-{:?} not supported for {:?}.",
4971+ input_ty, output_ty, op
4972+ ),
4973+ },
4974+ Opcode::ExtendedPairwiseAddUnsigned => match (input_ty, output_ty) {
4975+ (types::I8X16, types::I16X8) => {
4976+ static MUL_CONST: [u8; 16] = [0x01; 16];
4977+ let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
4978+ let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
4979+ ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
4980+ ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
4981+ ctx.emit(Inst::xmm_rm_r(
4982+ SseOpcode::Pmaddubsw,
4983+ RegMem::reg(mul_const_reg.to_reg()),
4984+ dst,
4985+ ));
4986+ }
4987+ (types::I16X8, types::I32X4) => {
4988+ static PXOR_CONST: [u8; 16] = [
4989+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
4990+ 0x00, 0x80, 0x00, 0x80,
4991+ ];
4992+ let pxor_const =
4993+ ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
4994+ let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
4995+ ctx.emit(Inst::xmm_load_const(
4996+ pxor_const,
4997+ pxor_const_reg,
4998+ types::I16X8,
4999+ ));
5000+ ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src), dst));
5001+ ctx.emit(Inst::xmm_rm_r(
5002+ SseOpcode::Pxor,
5003+ RegMem::reg(pxor_const_reg.to_reg()),
5004+ dst,
5005+ ));
5006+
5007+ static MADD_CONST: [u8; 16] = [
5008+ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
5009+ 0x01, 0x00, 0x01, 0x00,
5010+ ];
5011+ let madd_const =
5012+ ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
5013+ let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
5014+ ctx.emit(Inst::xmm_load_const(
5015+ madd_const,
5016+ madd_const_reg,
5017+ types::I16X8,
5018+ ));
5019+ ctx.emit(Inst::xmm_rm_r(
5020+ SseOpcode::Pmaddwd,
5021+ RegMem::reg(madd_const_reg.to_reg()),
5022+ dst,
5023+ ));
5024+
5025+ static ADDD_CONST2: [u8; 16] = [
5026+ 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
5027+ 0x00, 0x00, 0x01, 0x00,
5028+ ];
5029+ let addd_const2 =
5030+ ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
5031+ let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
5032+ ctx.emit(Inst::xmm_load_const(
5033+ addd_const2,
5034+ addd_const2_reg,
5035+ types::I16X8,
5036+ ));
5037+ ctx.emit(Inst::xmm_rm_r(
5038+ SseOpcode::Paddd,
5039+ RegMem::reg(addd_const2_reg.to_reg()),
5040+ dst,
5041+ ));
5042+ }
5043+ _ => unreachable!(
5044+ "Type pattern not supported {:?}-{:?} not supported for {:?}.",
5045+ input_ty, output_ty, op
5046+ ),
5047+ },
5048+ _ => unreachable!("{:?} not supported.", op),
5049+ }
5050+ */
5051+ }
49305052 Opcode :: UwidenHigh | Opcode :: UwidenLow | Opcode :: SwidenHigh | Opcode :: SwidenLow => {
49315053 let input_ty = ctx. input_ty ( insn, 0 ) ;
49325054 let output_ty = ctx. output_ty ( insn, 0 ) ;
0 commit comments