* @regs: base address of register map
* @clk: core clock, divided to calculate serial clock
* @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full
+ * @tfr: SPI transfer currently processed
* @tx_buf: pointer whence next transmitted byte is read
* @rx_buf: pointer where next received byte is written
* @tx_len: remaining bytes to transmit
* @rx_len: remaining bytes to receive
+ * @tx_prologue: bytes transmitted without DMA if first TX sglist entry's
+ * length is not a multiple of 4 (to overcome hardware limitation)
+ * @rx_prologue: bytes received without DMA if first RX sglist entry's
+ * length is not a multiple of 4 (to overcome hardware limitation)
+ * @tx_spillover: whether @tx_prologue spills over to second TX sglist entry
* @dma_pending: whether a DMA transfer is in progress
*/
struct bcm2835_spi {
void __iomem *regs;
struct clk *clk;
int irq;
+ struct spi_transfer *tfr;
const u8 *tx_buf;
u8 *rx_buf;
int tx_len;
int rx_len;
+ int tx_prologue;
+ int rx_prologue;
+ bool tx_spillover;
bool dma_pending;
};
}
}
+/**
+ * bcm2835_rd_fifo_count() - blindly read exactly @count bytes from RX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes to read from RX FIFO
+ *
+ * The caller must ensure that @bs->rx_len is greater than or equal to @count,
+ * that the RX FIFO contains at least @count bytes and that the DMA Enable flag
+ * in the CS register is set (such that a read from the FIFO register receives
+ * 32-bit instead of just 8-bit).
+ */
+static inline void bcm2835_rd_fifo_count(struct bcm2835_spi *bs, int count)
+{
+ u32 val;
+
+ bs->rx_len -= count;
+
+ while (count > 0) {
+ val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
+ if (bs->rx_buf) {
+ int len = min(count, 4);
+ memcpy(bs->rx_buf, &val, len);
+ bs->rx_buf += len;
+ }
+ count -= 4;
+ }
+}
+
+/**
+ * bcm2835_wr_fifo_count() - blindly write exactly @count bytes to TX FIFO
+ * @bs: BCM2835 SPI controller
+ * @count: bytes to write to TX FIFO
+ *
+ * The caller must ensure that @bs->tx_len is greater than or equal to @count,
+ * that the TX FIFO can accommodate @count bytes and that the DMA Enable flag
+ * in the CS register is set (such that a write to the FIFO register transmits
+ * 32-bit instead of just 8-bit).
+ */
+static inline void bcm2835_wr_fifo_count(struct bcm2835_spi *bs, int count)
+{
+ u32 val;
+
+ bs->tx_len -= count;
+
+ while (count > 0) {
+ if (bs->tx_buf) {
+ int len = min(count, 4);
+ memcpy(&val, bs->tx_buf, len);
+ bs->tx_buf += len;
+ } else {
+ val = 0;
+ }
+ bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
+ count -= 4;
+ }
+}
+
+/**
+ * bcm2835_wait_tx_fifo_empty() - busy-wait for TX FIFO to empty
+ * @bs: BCM2835 SPI controller
+ */
+static inline void bcm2835_wait_tx_fifo_empty(struct bcm2835_spi *bs)
+{
+ while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
+ cpu_relax();
+}
+
static void bcm2835_spi_reset_hw(struct spi_master *master)
{
struct bcm2835_spi *bs = spi_master_get_devdata(master);
* the main one being that DMA transfers are limited to 16 bit
* (so 0 to 65535 bytes) by the SPI HW due to BCM2835_SPI_DLEN
*
- * also we currently assume that the scatter-gather fragments are
- * all multiple of 4 (except the last) - otherwise we would need
- * to reset the FIFO before subsequent transfers...
- * this also means that tx/rx transfers sg's need to be of equal size!
- *
* there may be a few more border-cases we may need to address as well
* but unfortunately this would mean splitting up the scatter-gather
* list making it slightly unpractical...
*/
+
+/**
+ * bcm2835_spi_transfer_prologue() - transfer first few bytes without DMA
+ * @master: SPI master
+ * @tfr: SPI transfer
+ * @bs: BCM2835 SPI controller
+ * @cs: CS register
+ *
+ * A limitation in DMA mode is that the FIFO must be accessed in 4 byte chunks.
+ * Only the final write access is permitted to transmit less than 4 bytes, the
+ * SPI controller deduces its intended size from the DLEN register.
+ *
+ * If a TX or RX sglist contains multiple entries, one per page, and the first
+ * entry starts in the middle of a page, that first entry's length may not be
+ * a multiple of 4. Subsequent entries are fine because they span an entire
+ * page, hence do have a length that's a multiple of 4.
+ *
+ * This cannot happen with kmalloc'ed buffers (which is what most clients use)
+ * because they are contiguous in physical memory and therefore not split on
+ * page boundaries by spi_map_buf(). But it *can* happen with vmalloc'ed
+ * buffers.
+ *
+ * The DMA engine is incapable of combining sglist entries into a continuous
+ * stream of 4 byte chunks, it treats every entry separately: A TX entry is
+ * rounded up a to a multiple of 4 bytes by transmitting surplus bytes, an RX
+ * entry is rounded up by throwing away received bytes.
+ *
+ * Overcome this limitation by transferring the first few bytes without DMA:
+ * E.g. if the first TX sglist entry's length is 23 and the first RX's is 42,
+ * write 3 bytes to the TX FIFO but read only 2 bytes from the RX FIFO.
+ * The residue of 1 byte in the RX FIFO is picked up by DMA. Together with
+ * the rest of the first RX sglist entry it makes up a multiple of 4 bytes.
+ *
+ * Should the RX prologue be larger, say, 3 vis-à-vis a TX prologue of 1,
+ * write 1 + 4 = 5 bytes to the TX FIFO and read 3 bytes from the RX FIFO.
+ * Caution, the additional 4 bytes spill over to the second TX sglist entry
+ * if the length of the first is *exactly* 1.
+ *
+ * At most 6 bytes are written and at most 3 bytes read. Do we know the
+ * transfer has this many bytes? Yes, see BCM2835_SPI_DMA_MIN_LENGTH.
+ *
+ * The FIFO is normally accessed with 8-bit width by the CPU and 32-bit width
+ * by the DMA engine. Toggling the DMA Enable flag in the CS register switches
+ * the width but also garbles the FIFO's contents. The prologue must therefore
+ * be transmitted in 32-bit width to ensure that the following DMA transfer can
+ * pick up the residue in the RX FIFO in ungarbled form.
+ */
+static void bcm2835_spi_transfer_prologue(struct spi_master *master,
+ struct spi_transfer *tfr,
+ struct bcm2835_spi *bs,
+ u32 cs)
+{
+ int tx_remaining;
+
+ bs->tfr = tfr;
+ bs->tx_prologue = 0;
+ bs->rx_prologue = 0;
+ bs->tx_spillover = false;
+
+ if (!sg_is_last(&tfr->tx_sg.sgl[0]))
+ bs->tx_prologue = sg_dma_len(&tfr->tx_sg.sgl[0]) & 3;
+
+ if (!sg_is_last(&tfr->rx_sg.sgl[0])) {
+ bs->rx_prologue = sg_dma_len(&tfr->rx_sg.sgl[0]) & 3;
+
+ if (bs->rx_prologue > bs->tx_prologue) {
+ if (sg_is_last(&tfr->tx_sg.sgl[0])) {
+ bs->tx_prologue = bs->rx_prologue;
+ } else {
+ bs->tx_prologue += 4;
+ bs->tx_spillover =
+ !(sg_dma_len(&tfr->tx_sg.sgl[0]) & ~3);
+ }
+ }
+ }
+
+ /* rx_prologue > 0 implies tx_prologue > 0, so check only the latter */
+ if (!bs->tx_prologue)
+ return;
+
+ /* Write and read RX prologue. Adjust first entry in RX sglist. */
+ if (bs->rx_prologue) {
+ bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->rx_prologue);
+ bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
+ | BCM2835_SPI_CS_DMAEN);
+ bcm2835_wr_fifo_count(bs, bs->rx_prologue);
+ bcm2835_wait_tx_fifo_empty(bs);
+ bcm2835_rd_fifo_count(bs, bs->rx_prologue);
+ bcm2835_spi_reset_hw(master);
+
+ dma_sync_sg_for_device(master->dma_rx->device->dev,
+ tfr->rx_sg.sgl, 1, DMA_FROM_DEVICE);
+
+ tfr->rx_sg.sgl[0].dma_address += bs->rx_prologue;
+ tfr->rx_sg.sgl[0].length -= bs->rx_prologue;
+ }
+
+ /*
+ * Write remaining TX prologue. Adjust first entry in TX sglist.
+ * Also adjust second entry if prologue spills over to it.
+ */
+ tx_remaining = bs->tx_prologue - bs->rx_prologue;
+ if (tx_remaining) {
+ bcm2835_wr(bs, BCM2835_SPI_DLEN, tx_remaining);
+ bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA
+ | BCM2835_SPI_CS_DMAEN);
+ bcm2835_wr_fifo_count(bs, tx_remaining);
+ bcm2835_wait_tx_fifo_empty(bs);
+ bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_CLEAR_TX);
+ }
+
+ if (likely(!bs->tx_spillover)) {
+ tfr->tx_sg.sgl[0].dma_address += bs->tx_prologue;
+ tfr->tx_sg.sgl[0].length -= bs->tx_prologue;
+ } else {
+ tfr->tx_sg.sgl[0].length = 0;
+ tfr->tx_sg.sgl[1].dma_address += 4;
+ tfr->tx_sg.sgl[1].length -= 4;
+ }
+}
+
+/**
+ * bcm2835_spi_undo_prologue() - reconstruct original sglist state
+ * @bs: BCM2835 SPI controller
+ *
+ * Undo changes which were made to an SPI transfer's sglist when transmitting
+ * the prologue. This is necessary to ensure the same memory ranges are
+ * unmapped that were originally mapped.
+ */
+static void bcm2835_spi_undo_prologue(struct bcm2835_spi *bs)
+{
+ struct spi_transfer *tfr = bs->tfr;
+
+ if (!bs->tx_prologue)
+ return;
+
+ if (bs->rx_prologue) {
+ tfr->rx_sg.sgl[0].dma_address -= bs->rx_prologue;
+ tfr->rx_sg.sgl[0].length += bs->rx_prologue;
+ }
+
+ if (likely(!bs->tx_spillover)) {
+ tfr->tx_sg.sgl[0].dma_address -= bs->tx_prologue;
+ tfr->tx_sg.sgl[0].length += bs->tx_prologue;
+ } else {
+ tfr->tx_sg.sgl[0].length = bs->tx_prologue - 4;
+ tfr->tx_sg.sgl[1].dma_address -= 4;
+ tfr->tx_sg.sgl[1].length += 4;
+ }
+}
+
static void bcm2835_spi_dma_done(void *data)
{
struct spi_master *master = data;
*/
if (cmpxchg(&bs->dma_pending, true, false)) {
dmaengine_terminate_all(master->dma_tx);
+ bcm2835_spi_undo_prologue(bs);
}
/* and mark as completed */;
return dma_submit_error(cookie);
}
-static inline int bcm2835_check_sg_length(struct sg_table *sgt)
-{
- int i;
- struct scatterlist *sgl;
-
- /* check that the sg entries are word-sized (except for last) */
- for_each_sg(sgt->sgl, sgl, (int)sgt->nents - 1, i) {
- if (sg_dma_len(sgl) % 4)
- return -EFAULT;
- }
-
- return 0;
-}
-
static int bcm2835_spi_transfer_one_dma(struct spi_master *master,
struct spi_device *spi,
struct spi_transfer *tfr,
struct bcm2835_spi *bs = spi_master_get_devdata(master);
int ret;
- /* check that the scatter gather segments are all a multiple of 4 */
- if (bcm2835_check_sg_length(&tfr->tx_sg) ||
- bcm2835_check_sg_length(&tfr->rx_sg)) {
- dev_warn_once(&spi->dev,
- "scatter gather segment length is not a multiple of 4 - falling back to interrupt mode\n");
- return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
- }
+ /*
+ * Transfer first few bytes without DMA if length of first TX or RX
+ * sglist entry is not a multiple of 4 bytes (hardware limitation).
+ */
+ bcm2835_spi_transfer_prologue(master, tfr, bs, cs);
/* setup tx-DMA */
ret = bcm2835_spi_prepare_sg(master, tfr, true);
if (ret)
- return ret;
+ goto err_reset_hw;
/* start TX early */
dma_async_issue_pending(master->dma_tx);
bs->dma_pending = 1;
/* set the DMA length */
- bcm2835_wr(bs, BCM2835_SPI_DLEN, tfr->len);
+ bcm2835_wr(bs, BCM2835_SPI_DLEN, bs->tx_len);
/* start the HW */
bcm2835_wr(bs, BCM2835_SPI_CS,
/* need to reset on errors */
dmaengine_terminate_all(master->dma_tx);
bs->dma_pending = false;
- bcm2835_spi_reset_hw(master);
- return ret;
+ goto err_reset_hw;
}
/* start rx dma late */
/* wait for wakeup in framework */
return 1;
+
+err_reset_hw:
+ bcm2835_spi_reset_hw(master);
+ bcm2835_spi_undo_prologue(bs);
+ return ret;
}
static bool bcm2835_spi_can_dma(struct spi_master *master,
return false;
}
- /* if we run rx/tx_buf with word aligned addresses then we are OK */
- if ((((size_t)tfr->rx_buf & 3) == 0) &&
- (((size_t)tfr->tx_buf & 3) == 0))
- return true;
-
- /* otherwise we only allow transfers within the same page
- * to avoid wasting time on dma_mapping when it is not practical
- */
- if (((size_t)tfr->tx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) {
- dev_warn_once(&spi->dev,
- "Unaligned spi tx-transfer bridging page\n");
- return false;
- }
- if (((size_t)tfr->rx_buf & (PAGE_SIZE - 1)) + tfr->len > PAGE_SIZE) {
- dev_warn_once(&spi->dev,
- "Unaligned spi rx-transfer bridging page\n");
- return false;
- }
-
/* return OK */
return true;
}
if (cmpxchg(&bs->dma_pending, true, false)) {
dmaengine_terminate_all(master->dma_tx);
dmaengine_terminate_all(master->dma_rx);
+ bcm2835_spi_undo_prologue(bs);
}
/* and reset */
bcm2835_spi_reset_hw(master);