ALSA: emu10k1: fix PCM playback cache and interrupt handling
authorOswald Buddenhagen <oswald.buddenhagen@gmx.de>
Wed, 17 May 2023 17:42:52 +0000 (19:42 +0200)
committerTakashi Iwai <tiwai@suse.de>
Thu, 18 May 2023 05:30:17 +0000 (07:30 +0200)
The cache causes a fixed delay regardless of stream parameters.
Consequently, all that "cache invalidate size" calculation stuff was
garbage (which can be traced right back to Creative's OSS driver).

This also removes the definitions of registers CD1..CDF, because they
are accessed only relative to CD0 anyway.

Signed-off-by: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
Link: https://lore.kernel.org/r/20230517174256.3657060-5-oswald.buddenhagen@gmx.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
include/sound/emu10k1.h
sound/pci/emu10k1/emupcm.c

index 2d64f07..ee662a1 100644 (file)
 #define IPR_MIDITRANSBUFEMPTY  0x00000100      /* MIDI UART transmit buffer empty              */
 #define IPR_MIDIRECVBUFEMPTY   0x00000080      /* MIDI UART receive buffer empty               */
 #define IPR_CHANNELLOOP                0x00000040      /* Channel (half) loop interrupt(s) pending     */
+                                               /* The interrupt is triggered shortly after     */
+                                               /* CCR_READADDRESS has crossed the boundary;    */
+                                               /* due to the cache, this runs ahead of the     */
+                                               /* actual playback position.                    */
 #define IPR_CHANNELNUMBERMASK  0x0000003f      /* When IPR_CHANNELLOOP is set, indicates the   */
                                                /* highest set channel in CLIPL, CLIPH, HLIPL,  */
                                                /* or HLIPH.  When IPR is written with CL set,  */
@@ -586,24 +590,22 @@ SUB_REG(PEFE, FILTERAMOUNT,       0x000000ff)     /* Filter envlope amount                                */
 
 /* 0x1f: not used */
 
-#define CD0                    0x20            /* Cache data 0 register                                */
-#define CD1                    0x21            /* Cache data 1 register                                */
-#define CD2                    0x22            /* Cache data 2 register                                */
-#define CD3                    0x23            /* Cache data 3 register                                */
-#define CD4                    0x24            /* Cache data 4 register                                */
-#define CD5                    0x25            /* Cache data 5 register                                */
-#define CD6                    0x26            /* Cache data 6 register                                */
-#define CD7                    0x27            /* Cache data 7 register                                */
-#define CD8                    0x28            /* Cache data 8 register                                */
-#define CD9                    0x29            /* Cache data 9 register                                */
-#define CDA                    0x2a            /* Cache data A register                                */
-#define CDB                    0x2b            /* Cache data B register                                */
-#define CDC                    0x2c            /* Cache data C register                                */
-#define CDD                    0x2d            /* Cache data D register                                */
-#define CDE                    0x2e            /* Cache data E register                                */
-#define CDF                    0x2f            /* Cache data F register                                */
-
-/* 0x30-3f seem to be the same as 0x20-2f */
+// 32 cache registers (== 128 bytes) per channel follow.
+// In stereo mode, the two channels' caches are concatenated into one,
+// and hold the interleaved frames.
+// The cache holds 64 frames, so the upper half is not used in 8-bit mode.
+// All registers mentioned below count in frames.
+// The cache is a ring buffer; CCR_READADDRESS operates modulo 64.
+// The cache is filled from (CCCA_CURRADDR - CCR_CACHEINVALIDSIZE)
+// into (CCR_READADDRESS - CCR_CACHEINVALIDSIZE).
+// The engine has a fetch threshold of 32 bytes, so it tries to keep
+// CCR_CACHEINVALIDSIZE below 8 (16-bit stereo), 16 (16-bit mono,
+// 8-bit stereo), or 32 (8-bit mono). The actual transfers are pretty
+// unpredictable, especially if several voices are running.
+// Frames are consumed at CCR_READADDRESS, which is incremented afterwards,
+// along with CCCA_CURRADDR and CCR_CACHEINVALIDSIZE. This implies that the
+// actual playback position always lags CCCA_CURRADDR by exactly 64 frames.
+#define CD0                    0x20            /* Cache data registers 0 .. 0x1f                       */
 
 #define PTB                    0x40            /* Page table base register                             */
 #define PTB_MASK               0xfffff000      /* Physical address of the page table in host memory    */
index a6c4f18..feb5759 100644 (file)
@@ -112,6 +112,10 @@ static int snd_emu10k1_pcm_channel_alloc(struct snd_emu10k1_pcm * epcm, int voic
                }
        }
        if (epcm->extra == NULL) {
+               // The hardware supports only (half-)loop interrupts, so to support an
+               // arbitrary number of periods per buffer, we use an extra voice with a
+               // period-sized loop as the interrupt source. Additionally, the interrupt
+               // timing of the hardware is "suboptimal" and needs some compensation.
                err = snd_emu10k1_voice_alloc(epcm->emu,
                                              epcm->type == PLAYBACK_EMUVOICE ? EMU10K1_PCM : EMU10K1_EFX,
                                              1,
@@ -232,23 +236,6 @@ static unsigned int emu10k1_select_interprom(unsigned int pitch_target)
                return CCCA_INTERPROM_2;
 }
 
-/*
- * calculate cache invalidate size 
- *
- * stereo: channel is stereo
- * w_16: using 16bit samples
- *
- * returns: cache invalidate size in samples
- */
-static inline int emu10k1_ccis(int stereo, int w_16)
-{
-       if (w_16) {
-               return stereo ? 24 : 26;
-       } else {
-               return stereo ? 24*2 : 26*2;
-       }
-}
-
 static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
                                       int master, int extra,
                                       struct snd_emu10k1_voice *evoice,
@@ -264,7 +251,6 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
        unsigned char send_routing[8];
        unsigned long flags;
        unsigned int pitch_target;
-       unsigned int ccis;
 
        voice = evoice->number;
        stereo = runtime->channels == 2;
@@ -284,10 +270,8 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
                memcpy(send_amount, &mix->send_volume[tmp][0], 8);
        }
 
-       ccis = emu10k1_ccis(stereo, w_16);
-       
        if (master) {
-               evoice->epcm->ccca_start_addr = start_addr + ccis;
+               evoice->epcm->ccca_start_addr = start_addr + 64 - 3;
        }
        if (stereo && !extra) {
                // Not really necessary for the slave, but it doesn't hurt
@@ -317,11 +301,11 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
        else 
                pitch_target = emu10k1_calc_pitch_target(runtime->rate);
        if (extra)
-               snd_emu10k1_ptr_write(emu, CCCA, voice, start_addr |
+               snd_emu10k1_ptr_write(emu, CCCA, voice, (end_addr - 3) |
                              emu10k1_select_interprom(pitch_target) |
                              (w_16 ? 0 : CCCA_8BITSELECT));
        else
-               snd_emu10k1_ptr_write(emu, CCCA, voice, (start_addr + ccis) |
+               snd_emu10k1_ptr_write(emu, CCCA, voice, (start_addr + 64 - 3) |
                              emu10k1_select_interprom(pitch_target) |
                              (w_16 ? 0 : CCCA_8BITSELECT));
        /* Clear filter delay memory */
@@ -532,35 +516,30 @@ static void snd_emu10k1_playback_invalidate_cache(struct snd_emu10k1 *emu,
                                                  struct snd_emu10k1_voice *evoice)
 {
        struct snd_pcm_runtime *runtime;
-       unsigned int voice, stereo, i, ccis, cra = 64, cs, sample;
+       unsigned voice, stereo, sample;
+       u32 ccr;
 
        runtime = evoice->epcm->substream->runtime;
        voice = evoice->number;
        stereo = (runtime->channels == 2);
        sample = snd_pcm_format_width(runtime->format) == 16 ? 0 : 0x80808080;
-       ccis = emu10k1_ccis(stereo, sample == 0);
-       /* set cs to 2 * number of cache registers beside the invalidated */
-       cs = (sample == 0) ? (32-ccis) : (64-ccis+1) >> 1;
-       if (cs > 16) cs = 16;
-       for (i = 0; i < cs; i++) {
+
+       // We assume that the cache is resting at this point (i.e.,
+       // CCR_CACHEINVALIDSIZE is very small).
+
+       // Clear leading frames. For simplicitly, this does too much,
+       // except for 16-bit stereo. And the interpolator will actually
+       // access them at all only when we're pitch-shifting.
+       for (int i = 0; i < 3; i++)
                snd_emu10k1_ptr_write(emu, CD0 + i, voice, sample);
-               if (stereo) {
-                       snd_emu10k1_ptr_write(emu, CD0 + i, voice + 1, sample);
-               }
-       }
-       /* reset cache */
-       snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice, 0);
-       snd_emu10k1_ptr_write(emu, CCR_READADDRESS, voice, cra);
-       if (stereo) {
-               snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice + 1, 0);
-               // The engine goes haywire if this one is out of sync
-               snd_emu10k1_ptr_write(emu, CCR_READADDRESS, voice + 1, cra);
-       }
-       /* fill cache */
-       snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice, ccis);
+
+       // Fill cache
+       ccr = (64 - 3) << REG_SHIFT(CCR_CACHEINVALIDSIZE);
        if (stereo) {
-               snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice+1, ccis);
+               // The engine goes haywire if CCR_READADDRESS is out of sync
+               snd_emu10k1_ptr_write(emu, CCR, voice + 1, ccr);
        }
+       snd_emu10k1_ptr_write(emu, CCR, voice, ccr);
 }
 
 static void snd_emu10k1_playback_commit_volume(struct snd_emu10k1 *emu,