xen/grant-table: add gnttab_try_end_foreign_access()
[platform/kernel/linux-rpi.git] / drivers / spi / spi-dw-dma.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Special handling for DW DMA core
4  *
5  * Copyright (c) 2009, 2014 Intel Corporation.
6  */
7
8 #include <linux/completion.h>
9 #include <linux/dma-mapping.h>
10 #include <linux/dmaengine.h>
11 #include <linux/irqreturn.h>
12 #include <linux/jiffies.h>
13 #include <linux/pci.h>
14 #include <linux/platform_data/dma-dw.h>
15 #include <linux/spi/spi.h>
16 #include <linux/types.h>
17
18 #include "spi-dw.h"
19
20 #define RX_BUSY         0
21 #define RX_BURST_LEVEL  16
22 #define TX_BUSY         1
23 #define TX_BURST_LEVEL  16
24
25 static bool dw_spi_dma_chan_filter(struct dma_chan *chan, void *param)
26 {
27         struct dw_dma_slave *s = param;
28
29         if (s->dma_dev != chan->device->dev)
30                 return false;
31
32         chan->private = s;
33         return true;
34 }
35
36 static void dw_spi_dma_maxburst_init(struct dw_spi *dws)
37 {
38         struct dma_slave_caps caps;
39         u32 max_burst, def_burst;
40         int ret;
41
42         def_burst = dws->fifo_len / 2;
43
44         ret = dma_get_slave_caps(dws->rxchan, &caps);
45         if (!ret && caps.max_burst)
46                 max_burst = caps.max_burst;
47         else
48                 max_burst = RX_BURST_LEVEL;
49
50         dws->rxburst = min(max_burst, def_burst);
51         dw_writel(dws, DW_SPI_DMARDLR, dws->rxburst - 1);
52
53         ret = dma_get_slave_caps(dws->txchan, &caps);
54         if (!ret && caps.max_burst)
55                 max_burst = caps.max_burst;
56         else
57                 max_burst = TX_BURST_LEVEL;
58
59         /*
60          * Having a Rx DMA channel serviced with higher priority than a Tx DMA
61          * channel might not be enough to provide a well balanced DMA-based
62          * SPI transfer interface. There might still be moments when the Tx DMA
63          * channel is occasionally handled faster than the Rx DMA channel.
64          * That in its turn will eventually cause the SPI Rx FIFO overflow if
65          * SPI bus speed is high enough to fill the SPI Rx FIFO in before it's
66          * cleared by the Rx DMA channel. In order to fix the problem the Tx
67          * DMA activity is intentionally slowed down by limiting the SPI Tx
68          * FIFO depth with a value twice bigger than the Tx burst length.
69          */
70         dws->txburst = min(max_burst, def_burst);
71         dw_writel(dws, DW_SPI_DMATDLR, dws->txburst);
72 }
73
74 static void dw_spi_dma_sg_burst_init(struct dw_spi *dws)
75 {
76         struct dma_slave_caps tx = {0}, rx = {0};
77
78         dma_get_slave_caps(dws->txchan, &tx);
79         dma_get_slave_caps(dws->rxchan, &rx);
80
81         if (tx.max_sg_burst > 0 && rx.max_sg_burst > 0)
82                 dws->dma_sg_burst = min(tx.max_sg_burst, rx.max_sg_burst);
83         else if (tx.max_sg_burst > 0)
84                 dws->dma_sg_burst = tx.max_sg_burst;
85         else if (rx.max_sg_burst > 0)
86                 dws->dma_sg_burst = rx.max_sg_burst;
87         else
88                 dws->dma_sg_burst = 0;
89 }
90
91 static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
92 {
93         struct dw_dma_slave dma_tx = { .dst_id = 1 }, *tx = &dma_tx;
94         struct dw_dma_slave dma_rx = { .src_id = 0 }, *rx = &dma_rx;
95         struct pci_dev *dma_dev;
96         dma_cap_mask_t mask;
97
98         /*
99          * Get pci device for DMA controller, currently it could only
100          * be the DMA controller of Medfield
101          */
102         dma_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL);
103         if (!dma_dev)
104                 return -ENODEV;
105
106         dma_cap_zero(mask);
107         dma_cap_set(DMA_SLAVE, mask);
108
109         /* 1. Init rx channel */
110         rx->dma_dev = &dma_dev->dev;
111         dws->rxchan = dma_request_channel(mask, dw_spi_dma_chan_filter, rx);
112         if (!dws->rxchan)
113                 goto err_exit;
114
115         /* 2. Init tx channel */
116         tx->dma_dev = &dma_dev->dev;
117         dws->txchan = dma_request_channel(mask, dw_spi_dma_chan_filter, tx);
118         if (!dws->txchan)
119                 goto free_rxchan;
120
121         dws->master->dma_rx = dws->rxchan;
122         dws->master->dma_tx = dws->txchan;
123
124         init_completion(&dws->dma_completion);
125
126         dw_spi_dma_maxburst_init(dws);
127
128         dw_spi_dma_sg_burst_init(dws);
129
130         return 0;
131
132 free_rxchan:
133         dma_release_channel(dws->rxchan);
134         dws->rxchan = NULL;
135 err_exit:
136         return -EBUSY;
137 }
138
139 static int dw_spi_dma_init_generic(struct device *dev, struct dw_spi *dws)
140 {
141         dws->rxchan = dma_request_slave_channel(dev, "rx");
142         if (!dws->rxchan)
143                 return -ENODEV;
144
145         dws->txchan = dma_request_slave_channel(dev, "tx");
146         if (!dws->txchan) {
147                 dma_release_channel(dws->rxchan);
148                 dws->rxchan = NULL;
149                 return -ENODEV;
150         }
151
152         dws->master->dma_rx = dws->rxchan;
153         dws->master->dma_tx = dws->txchan;
154
155         init_completion(&dws->dma_completion);
156
157         dw_spi_dma_maxburst_init(dws);
158
159         dw_spi_dma_sg_burst_init(dws);
160
161         return 0;
162 }
163
164 static void dw_spi_dma_exit(struct dw_spi *dws)
165 {
166         if (dws->txchan) {
167                 dmaengine_terminate_sync(dws->txchan);
168                 dma_release_channel(dws->txchan);
169         }
170
171         if (dws->rxchan) {
172                 dmaengine_terminate_sync(dws->rxchan);
173                 dma_release_channel(dws->rxchan);
174         }
175 }
176
177 static irqreturn_t dw_spi_dma_transfer_handler(struct dw_spi *dws)
178 {
179         dw_spi_check_status(dws, false);
180
181         complete(&dws->dma_completion);
182
183         return IRQ_HANDLED;
184 }
185
186 static bool dw_spi_can_dma(struct spi_controller *master,
187                            struct spi_device *spi, struct spi_transfer *xfer)
188 {
189         struct dw_spi *dws = spi_controller_get_devdata(master);
190
191         return xfer->len > dws->fifo_len;
192 }
193
194 static enum dma_slave_buswidth dw_spi_dma_convert_width(u8 n_bytes)
195 {
196         if (n_bytes == 1)
197                 return DMA_SLAVE_BUSWIDTH_1_BYTE;
198         else if (n_bytes == 2)
199                 return DMA_SLAVE_BUSWIDTH_2_BYTES;
200
201         return DMA_SLAVE_BUSWIDTH_UNDEFINED;
202 }
203
204 static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed)
205 {
206         unsigned long long ms;
207
208         ms = len * MSEC_PER_SEC * BITS_PER_BYTE;
209         do_div(ms, speed);
210         ms += ms + 200;
211
212         if (ms > UINT_MAX)
213                 ms = UINT_MAX;
214
215         ms = wait_for_completion_timeout(&dws->dma_completion,
216                                          msecs_to_jiffies(ms));
217
218         if (ms == 0) {
219                 dev_err(&dws->master->cur_msg->spi->dev,
220                         "DMA transaction timed out\n");
221                 return -ETIMEDOUT;
222         }
223
224         return 0;
225 }
226
227 static inline bool dw_spi_dma_tx_busy(struct dw_spi *dws)
228 {
229         return !(dw_readl(dws, DW_SPI_SR) & SR_TF_EMPT);
230 }
231
232 static int dw_spi_dma_wait_tx_done(struct dw_spi *dws,
233                                    struct spi_transfer *xfer)
234 {
235         int retry = SPI_WAIT_RETRIES;
236         struct spi_delay delay;
237         u32 nents;
238
239         nents = dw_readl(dws, DW_SPI_TXFLR);
240         delay.unit = SPI_DELAY_UNIT_SCK;
241         delay.value = nents * dws->n_bytes * BITS_PER_BYTE;
242
243         while (dw_spi_dma_tx_busy(dws) && retry--)
244                 spi_delay_exec(&delay, xfer);
245
246         if (retry < 0) {
247                 dev_err(&dws->master->dev, "Tx hanged up\n");
248                 return -EIO;
249         }
250
251         return 0;
252 }
253
254 /*
255  * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
256  * channel will clear a corresponding bit.
257  */
258 static void dw_spi_dma_tx_done(void *arg)
259 {
260         struct dw_spi *dws = arg;
261
262         clear_bit(TX_BUSY, &dws->dma_chan_busy);
263         if (test_bit(RX_BUSY, &dws->dma_chan_busy))
264                 return;
265
266         complete(&dws->dma_completion);
267 }
268
269 static int dw_spi_dma_config_tx(struct dw_spi *dws)
270 {
271         struct dma_slave_config txconf;
272
273         memset(&txconf, 0, sizeof(txconf));
274         txconf.direction = DMA_MEM_TO_DEV;
275         txconf.dst_addr = dws->dma_addr;
276         txconf.dst_maxburst = dws->txburst;
277         txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
278         txconf.dst_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
279         txconf.device_fc = false;
280
281         return dmaengine_slave_config(dws->txchan, &txconf);
282 }
283
284 static int dw_spi_dma_submit_tx(struct dw_spi *dws, struct scatterlist *sgl,
285                                 unsigned int nents)
286 {
287         struct dma_async_tx_descriptor *txdesc;
288         dma_cookie_t cookie;
289         int ret;
290
291         txdesc = dmaengine_prep_slave_sg(dws->txchan, sgl, nents,
292                                          DMA_MEM_TO_DEV,
293                                          DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
294         if (!txdesc)
295                 return -ENOMEM;
296
297         txdesc->callback = dw_spi_dma_tx_done;
298         txdesc->callback_param = dws;
299
300         cookie = dmaengine_submit(txdesc);
301         ret = dma_submit_error(cookie);
302         if (ret) {
303                 dmaengine_terminate_sync(dws->txchan);
304                 return ret;
305         }
306
307         set_bit(TX_BUSY, &dws->dma_chan_busy);
308
309         return 0;
310 }
311
312 static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws)
313 {
314         return !!(dw_readl(dws, DW_SPI_SR) & SR_RF_NOT_EMPT);
315 }
316
317 static int dw_spi_dma_wait_rx_done(struct dw_spi *dws)
318 {
319         int retry = SPI_WAIT_RETRIES;
320         struct spi_delay delay;
321         unsigned long ns, us;
322         u32 nents;
323
324         /*
325          * It's unlikely that DMA engine is still doing the data fetching, but
326          * if it's let's give it some reasonable time. The timeout calculation
327          * is based on the synchronous APB/SSI reference clock rate, on a
328          * number of data entries left in the Rx FIFO, times a number of clock
329          * periods normally needed for a single APB read/write transaction
330          * without PREADY signal utilized (which is true for the DW APB SSI
331          * controller).
332          */
333         nents = dw_readl(dws, DW_SPI_RXFLR);
334         ns = 4U * NSEC_PER_SEC / dws->max_freq * nents;
335         if (ns <= NSEC_PER_USEC) {
336                 delay.unit = SPI_DELAY_UNIT_NSECS;
337                 delay.value = ns;
338         } else {
339                 us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
340                 delay.unit = SPI_DELAY_UNIT_USECS;
341                 delay.value = clamp_val(us, 0, USHRT_MAX);
342         }
343
344         while (dw_spi_dma_rx_busy(dws) && retry--)
345                 spi_delay_exec(&delay, NULL);
346
347         if (retry < 0) {
348                 dev_err(&dws->master->dev, "Rx hanged up\n");
349                 return -EIO;
350         }
351
352         return 0;
353 }
354
355 /*
356  * dws->dma_chan_busy is set before the dma transfer starts, callback for rx
357  * channel will clear a corresponding bit.
358  */
359 static void dw_spi_dma_rx_done(void *arg)
360 {
361         struct dw_spi *dws = arg;
362
363         clear_bit(RX_BUSY, &dws->dma_chan_busy);
364         if (test_bit(TX_BUSY, &dws->dma_chan_busy))
365                 return;
366
367         complete(&dws->dma_completion);
368 }
369
370 static int dw_spi_dma_config_rx(struct dw_spi *dws)
371 {
372         struct dma_slave_config rxconf;
373
374         memset(&rxconf, 0, sizeof(rxconf));
375         rxconf.direction = DMA_DEV_TO_MEM;
376         rxconf.src_addr = dws->dma_addr;
377         rxconf.src_maxburst = dws->rxburst;
378         rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
379         rxconf.src_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
380         rxconf.device_fc = false;
381
382         return dmaengine_slave_config(dws->rxchan, &rxconf);
383 }
384
385 static int dw_spi_dma_submit_rx(struct dw_spi *dws, struct scatterlist *sgl,
386                                 unsigned int nents)
387 {
388         struct dma_async_tx_descriptor *rxdesc;
389         dma_cookie_t cookie;
390         int ret;
391
392         rxdesc = dmaengine_prep_slave_sg(dws->rxchan, sgl, nents,
393                                          DMA_DEV_TO_MEM,
394                                          DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
395         if (!rxdesc)
396                 return -ENOMEM;
397
398         rxdesc->callback = dw_spi_dma_rx_done;
399         rxdesc->callback_param = dws;
400
401         cookie = dmaengine_submit(rxdesc);
402         ret = dma_submit_error(cookie);
403         if (ret) {
404                 dmaengine_terminate_sync(dws->rxchan);
405                 return ret;
406         }
407
408         set_bit(RX_BUSY, &dws->dma_chan_busy);
409
410         return 0;
411 }
412
413 static int dw_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
414 {
415         u16 imr, dma_ctrl;
416         int ret;
417
418         if (!xfer->tx_buf)
419                 return -EINVAL;
420
421         /* Setup DMA channels */
422         ret = dw_spi_dma_config_tx(dws);
423         if (ret)
424                 return ret;
425
426         if (xfer->rx_buf) {
427                 ret = dw_spi_dma_config_rx(dws);
428                 if (ret)
429                         return ret;
430         }
431
432         /* Set the DMA handshaking interface */
433         dma_ctrl = SPI_DMA_TDMAE;
434         if (xfer->rx_buf)
435                 dma_ctrl |= SPI_DMA_RDMAE;
436         dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
437
438         /* Set the interrupt mask */
439         imr = SPI_INT_TXOI;
440         if (xfer->rx_buf)
441                 imr |= SPI_INT_RXUI | SPI_INT_RXOI;
442         spi_umask_intr(dws, imr);
443
444         reinit_completion(&dws->dma_completion);
445
446         dws->transfer_handler = dw_spi_dma_transfer_handler;
447
448         return 0;
449 }
450
451 static int dw_spi_dma_transfer_all(struct dw_spi *dws,
452                                    struct spi_transfer *xfer)
453 {
454         int ret;
455
456         /* Submit the DMA Tx transfer */
457         ret = dw_spi_dma_submit_tx(dws, xfer->tx_sg.sgl, xfer->tx_sg.nents);
458         if (ret)
459                 goto err_clear_dmac;
460
461         /* Submit the DMA Rx transfer if required */
462         if (xfer->rx_buf) {
463                 ret = dw_spi_dma_submit_rx(dws, xfer->rx_sg.sgl,
464                                            xfer->rx_sg.nents);
465                 if (ret)
466                         goto err_clear_dmac;
467
468                 /* rx must be started before tx due to spi instinct */
469                 dma_async_issue_pending(dws->rxchan);
470         }
471
472         dma_async_issue_pending(dws->txchan);
473
474         ret = dw_spi_dma_wait(dws, xfer->len, xfer->effective_speed_hz);
475
476 err_clear_dmac:
477         dw_writel(dws, DW_SPI_DMACR, 0);
478
479         return ret;
480 }
481
482 /*
483  * In case if at least one of the requested DMA channels doesn't support the
484  * hardware accelerated SG list entries traverse, the DMA driver will most
485  * likely work that around by performing the IRQ-based SG list entries
486  * resubmission. That might and will cause a problem if the DMA Tx channel is
487  * recharged and re-executed before the Rx DMA channel. Due to
488  * non-deterministic IRQ-handler execution latency the DMA Tx channel will
489  * start pushing data to the SPI bus before the Rx DMA channel is even
490  * reinitialized with the next inbound SG list entry. By doing so the DMA Tx
491  * channel will implicitly start filling the DW APB SSI Rx FIFO up, which while
492  * the DMA Rx channel being recharged and re-executed will eventually be
493  * overflown.
494  *
495  * In order to solve the problem we have to feed the DMA engine with SG list
496  * entries one-by-one. It shall keep the DW APB SSI Tx and Rx FIFOs
497  * synchronized and prevent the Rx FIFO overflow. Since in general the tx_sg
498  * and rx_sg lists may have different number of entries of different lengths
499  * (though total length should match) let's virtually split the SG-lists to the
500  * set of DMA transfers, which length is a minimum of the ordered SG-entries
501  * lengths. An ASCII-sketch of the implemented algo is following:
502  *                  xfer->len
503  *                |___________|
504  * tx_sg list:    |___|____|__|
505  * rx_sg list:    |_|____|____|
506  * DMA transfers: |_|_|__|_|__|
507  *
508  * Note in order to have this workaround solving the denoted problem the DMA
509  * engine driver should properly initialize the max_sg_burst capability and set
510  * the DMA device max segment size parameter with maximum data block size the
511  * DMA engine supports.
512  */
513
514 static int dw_spi_dma_transfer_one(struct dw_spi *dws,
515                                    struct spi_transfer *xfer)
516 {
517         struct scatterlist *tx_sg = NULL, *rx_sg = NULL, tx_tmp, rx_tmp;
518         unsigned int tx_len = 0, rx_len = 0;
519         unsigned int base, len;
520         int ret;
521
522         sg_init_table(&tx_tmp, 1);
523         sg_init_table(&rx_tmp, 1);
524
525         for (base = 0, len = 0; base < xfer->len; base += len) {
526                 /* Fetch next Tx DMA data chunk */
527                 if (!tx_len) {
528                         tx_sg = !tx_sg ? &xfer->tx_sg.sgl[0] : sg_next(tx_sg);
529                         sg_dma_address(&tx_tmp) = sg_dma_address(tx_sg);
530                         tx_len = sg_dma_len(tx_sg);
531                 }
532
533                 /* Fetch next Rx DMA data chunk */
534                 if (!rx_len) {
535                         rx_sg = !rx_sg ? &xfer->rx_sg.sgl[0] : sg_next(rx_sg);
536                         sg_dma_address(&rx_tmp) = sg_dma_address(rx_sg);
537                         rx_len = sg_dma_len(rx_sg);
538                 }
539
540                 len = min(tx_len, rx_len);
541
542                 sg_dma_len(&tx_tmp) = len;
543                 sg_dma_len(&rx_tmp) = len;
544
545                 /* Submit DMA Tx transfer */
546                 ret = dw_spi_dma_submit_tx(dws, &tx_tmp, 1);
547                 if (ret)
548                         break;
549
550                 /* Submit DMA Rx transfer */
551                 ret = dw_spi_dma_submit_rx(dws, &rx_tmp, 1);
552                 if (ret)
553                         break;
554
555                 /* Rx must be started before Tx due to SPI instinct */
556                 dma_async_issue_pending(dws->rxchan);
557
558                 dma_async_issue_pending(dws->txchan);
559
560                 /*
561                  * Here we only need to wait for the DMA transfer to be
562                  * finished since SPI controller is kept enabled during the
563                  * procedure this loop implements and there is no risk to lose
564                  * data left in the Tx/Rx FIFOs.
565                  */
566                 ret = dw_spi_dma_wait(dws, len, xfer->effective_speed_hz);
567                 if (ret)
568                         break;
569
570                 reinit_completion(&dws->dma_completion);
571
572                 sg_dma_address(&tx_tmp) += len;
573                 sg_dma_address(&rx_tmp) += len;
574                 tx_len -= len;
575                 rx_len -= len;
576         }
577
578         dw_writel(dws, DW_SPI_DMACR, 0);
579
580         return ret;
581 }
582
583 static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
584 {
585         unsigned int nents;
586         int ret;
587
588         nents = max(xfer->tx_sg.nents, xfer->rx_sg.nents);
589
590         /*
591          * Execute normal DMA-based transfer (which submits the Rx and Tx SG
592          * lists directly to the DMA engine at once) if either full hardware
593          * accelerated SG list traverse is supported by both channels, or the
594          * Tx-only SPI transfer is requested, or the DMA engine is capable to
595          * handle both SG lists on hardware accelerated basis.
596          */
597         if (!dws->dma_sg_burst || !xfer->rx_buf || nents <= dws->dma_sg_burst)
598                 ret = dw_spi_dma_transfer_all(dws, xfer);
599         else
600                 ret = dw_spi_dma_transfer_one(dws, xfer);
601         if (ret)
602                 return ret;
603
604         if (dws->master->cur_msg->status == -EINPROGRESS) {
605                 ret = dw_spi_dma_wait_tx_done(dws, xfer);
606                 if (ret)
607                         return ret;
608         }
609
610         if (xfer->rx_buf && dws->master->cur_msg->status == -EINPROGRESS)
611                 ret = dw_spi_dma_wait_rx_done(dws);
612
613         return ret;
614 }
615
616 static void dw_spi_dma_stop(struct dw_spi *dws)
617 {
618         if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
619                 dmaengine_terminate_sync(dws->txchan);
620                 clear_bit(TX_BUSY, &dws->dma_chan_busy);
621         }
622         if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
623                 dmaengine_terminate_sync(dws->rxchan);
624                 clear_bit(RX_BUSY, &dws->dma_chan_busy);
625         }
626 }
627
628 static const struct dw_spi_dma_ops dw_spi_dma_mfld_ops = {
629         .dma_init       = dw_spi_dma_init_mfld,
630         .dma_exit       = dw_spi_dma_exit,
631         .dma_setup      = dw_spi_dma_setup,
632         .can_dma        = dw_spi_can_dma,
633         .dma_transfer   = dw_spi_dma_transfer,
634         .dma_stop       = dw_spi_dma_stop,
635 };
636
637 void dw_spi_dma_setup_mfld(struct dw_spi *dws)
638 {
639         dws->dma_ops = &dw_spi_dma_mfld_ops;
640 }
641 EXPORT_SYMBOL_GPL(dw_spi_dma_setup_mfld);
642
643 static const struct dw_spi_dma_ops dw_spi_dma_generic_ops = {
644         .dma_init       = dw_spi_dma_init_generic,
645         .dma_exit       = dw_spi_dma_exit,
646         .dma_setup      = dw_spi_dma_setup,
647         .can_dma        = dw_spi_can_dma,
648         .dma_transfer   = dw_spi_dma_transfer,
649         .dma_stop       = dw_spi_dma_stop,
650 };
651
652 void dw_spi_dma_setup_generic(struct dw_spi *dws)
653 {
654         dws->dma_ops = &dw_spi_dma_generic_ops;
655 }
656 EXPORT_SYMBOL_GPL(dw_spi_dma_setup_generic);