@@ -55,6 +55,19 @@ static inline s8 read_s8(u32 addr) { return (s8)_MMU_read08<ARMCPU_ARM7,MMU_AT_D
5555// as this is guaranteed to be safe.
5656#define ENABLE_DUMMY_SPU_CAPTURE 1
5757
58+ // This specifies how many samples to buffer for the channel FIFO
59+ // If this is too low, then badly-synchronized streams can cause
60+ // buffer overrun.
61+ #define SPUCHAN_FIFO_DELAY 3
62+ #if SPUCHAN_FIFO_DELAY >= SPUCHAN_PCM16B_SIZE
63+ # error "Channel FIFO delay must less than SPUCHAN_PCM16B_SIZE"
64+ #endif
65+
66+ // This controls the delay for the capture unit (how many output
67+ // samples to stall for before actually writing anything).
68+ // This seems to need matching to the channel playback delay (see KeyOn())?
69+ #define SPUCAPTURE_FIFO_DELAY 3
70+
5871#define K_ADPCM_LOOPING_RECOVERY_INDEX 255
5972
6073#define CATMULLROM_INTERPOLATION_RESOLUTION_BITS 11
@@ -785,7 +798,7 @@ void SPU_struct::ProbeCapture(int which)
785798 cap.runtime .dad = cap.dad ;
786799 u32 len = cap.len ;
787800 if (len==0 ) len=1 ;
788- cap.runtime .sampcntFrac = 0 , cap.runtime .sampcntInt = -SPUCAPTURE_FIFO_SIZE ;
801+ cap.runtime .sampcntFrac = 0 , cap.runtime .sampcntInt = -SPUCAPTURE_FIFO_DELAY ;
789802}
790803
791804void SPU_struct::WriteByte (u32 addr, u8 val)
@@ -1079,10 +1092,10 @@ FORCEINLINE static s16 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos)
10791092 // of a 'luxury' thing, we should be able to use MinMax
10801093 // since if the user is using this interpolation method,
10811094 // there's likely enough processing power to handle it.
1082- s32 a = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 3 )];
1083- s32 b = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 2 )];
1084- s32 c = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 1 )];
1085- s32 d = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 0 )];
1095+ s32 a = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 0 )];
1096+ s32 b = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 1 )];
1097+ s32 c = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 2 )];
1098+ s32 d = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 3 )];
10861099 const u16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)];
10871100 return (s16)MinMax ((-a*(s32)w[0 ] + b*(s32)w[1 ] + c*(s32)w[2 ] - d*(s32)w[3 ]) >> 15 , -0x8000 , +0x7FFF );
10881101 }
@@ -1096,8 +1109,8 @@ FORCEINLINE static s16 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos)
10961109 // NOTE: Always cast the result to s16. (b-a) can
10971110 // overflow, but a+(b-a)*subPos can't. So we might
10981111 // have garbage in the upper 16 bits.
1099- s32 a = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 1 )];
1100- s32 b = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 0 )];
1112+ s32 a = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 0 )];
1113+ s32 b = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 1 )];
11011114 s32 subPos16 = (s32)cos_lut[subPos >> (32 - COSINE_INTERPOLATION_RESOLUTION_BITS)];
11021115 return (s16)(a + (((b - a)*subPos16) >> 16 ));
11031116 }
@@ -1108,15 +1121,15 @@ FORCEINLINE static s16 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos)
11081121 // sampleI = sampleA * (1 - ratio) + sampleB * ratio
11091122 // Delay: 1 sample, Maximum gain: 1.0
11101123 // NOTE: Always cast the result to s16 (see above).
1111- s32 a = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 1 )];
1112- s32 b = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs - 0 )];
1124+ s32 a = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 0 )];
1125+ s32 b = pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 1 )];
11131126 s32 subPos16 = subPos >> (32 - 16 );
11141127 return (s16)(a + (((b - a)*subPos16) >> 16 ));
11151128 }
11161129
11171130 default :
11181131 // Delay: 0 samples, Maximum gain: 1.0
1119- return pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs)];
1132+ return pcm16b[SPUCHAN_PCM16B_AT (pcm16bOffs-SPUCHAN_FIFO_DELAY+ 0 )];
11201133 }
11211134}
11221135
@@ -1411,7 +1424,7 @@ template<int CAP_BITS, bool USE_SRCBUF>
14111424 s32 pos = runtime.sampcntInt ;
14121425 do
14131426 {
1414- s16 sample = USE_SRCBUF ? (*srcBuf) : 0 ;
1427+ s16 *data = &runtime. pcm16b [ SPUCAPTURE_PCM16B_AT (runtime. pcm16bOffs )] ;
14151428 u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry (&runtime.sampcntFrac , srcChan.sampincFrac );
14161429 while (nSamplesToProcess--)
14171430 {
@@ -1425,22 +1438,22 @@ template<int CAP_BITS, bool USE_SRCBUF>
14251438 pos -= capLen_shifted;
14261439 }
14271440
1428- s16 *data = &runtime.pcm16b [SPUCAPTURE_PCM16B_AT (runtime.pcm16bOffs )];
14291441 if (pos >= 0 )
14301442 {
1443+ s16 sample = *data;
14311444 if (CAP_BITS == 8 )
14321445 {
1433- _MMU_write08<ARMCPU_ARM7,MMU_AT_DMA>(runtime.dad + pos*sizeof (s8), (u8 )(*data >> 8 ));
1446+ _MMU_write08<ARMCPU_ARM7,MMU_AT_DMA>(runtime.dad + pos*sizeof (s8), (u8 )(sample >> 8 ));
14341447 }
14351448 else
14361449 {
1437- _MMU_write16<ARMCPU_ARM7,MMU_AT_DMA>(runtime.dad + pos*sizeof (s16), (u16 )(*data ));
1450+ _MMU_write16<ARMCPU_ARM7,MMU_AT_DMA>(runtime.dad + pos*sizeof (s16), (u16 )(sample ));
14381451 }
14391452 }
1440- *data = sample;
1441- runtime.pcm16bOffs ++;
14421453 pos++;
14431454 }
1455+ *data = USE_SRCBUF ? (*srcBuf) : 0 ;
1456+ runtime.pcm16bOffs ++;
14441457
14451458 // srcBuf[] stores two samples per time unit
14461459 // Either {Ch0[+Ch1],Ch2[+Ch3]}, or {LMix,RMix}
0 commit comments