1 // SPDX-License-Identifier: GPL-2.0-or-later
3 Madge Ambassador ATM Adapter driver.
4 Copyright (C) 1995-1999 Madge Networks Ltd.
8 /* * dedicated to the memory of Graham Gordon 1971-1998 * */
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/pci.h>
13 #include <linux/kernel.h>
14 #include <linux/init.h>
15 #include <linux/ioport.h>
16 #include <linux/atmdev.h>
17 #include <linux/delay.h>
18 #include <linux/interrupt.h>
19 #include <linux/poison.h>
20 #include <linux/bitrev.h>
21 #include <linux/mutex.h>
22 #include <linux/firmware.h>
23 #include <linux/ihex.h>
24 #include <linux/slab.h>
26 #include <linux/atomic.h>
28 #include <asm/byteorder.h>
30 #include "ambassador.h"
32 #define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
33 #define description_string "Madge ATM Ambassador driver"
34 #define version_string "1.2.4"
36 static inline void __init show_version (void) {
37 printk ("%s version %s\n", description_string, version_string);
44 I Hardware, detection, initialisation and shutdown.
48 This driver is for the PCI ATMizer-based Ambassador card (except
49 very early versions). It is not suitable for the similar EISA "TR7"
50 card. Commercially, both cards are known as Collage Server ATM
53 The loader supports image transfer to the card, image start and few
54 other miscellaneous commands.
56 Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
58 The cards are big-endian.
62 Standard PCI stuff, the early cards are detected and rejected.
66 The cards are reset and the self-test results are checked. The
67 microcode image is then transferred and started. This waits for a
68 pointer to a descriptor containing details of the host-based queues
69 and buffers and various parameters etc. Once they are processed
70 normal operations may begin. The BIA is read using a microcode
75 This may be accomplished either by a card reset or via the microcode
76 shutdown command. Further investigation required.
80 The card reset does not affect PCI configuration (good) or the
81 contents of several other "shared run-time registers" (bad) which
82 include doorbell and interrupt control as well as EEPROM and PCI
83 control. The driver must be careful when modifying these registers
84 not to touch bits it does not use and to undo any changes at exit.
90 The adapter is quite intelligent (fast) and has a simple interface
91 (few features). VPI is always zero, 1024 VCIs are supported. There
92 is limited cell rate support. UBR channels can be capped and ABR
93 (explicit rate, but not EFCI) is supported. There is no CBR or VBR
96 1. Driver <-> Adapter Communication
98 Apart from the basic loader commands, the driver communicates
99 through three entities: the command queue (CQ), the transmit queue
100 pair (TXQ) and the receive queue pairs (RXQ). These three entities
101 are set up by the host and passed to the microcode just after it has
104 All queues are host-based circular queues. They are contiguous and
105 (due to hardware limitations) have some restrictions as to their
106 locations in (bus) memory. They are of the "full means the same as
107 empty so don't do that" variety since the adapter uses pointers
110 The queue pairs work as follows: one queue is for supply to the
111 adapter, items in it are pending and are owned by the adapter; the
112 other is the queue for return from the adapter, items in it have
113 been dealt with by the adapter. The host adds items to the supply
114 (TX descriptors and free RX buffer descriptors) and removes items
115 from the return (TX and RX completions). The adapter deals with out
116 of order completions.
118 Interrupts (card to host) and the doorbell (host to card) are used
123 This is to communicate "open VC", "close VC", "get stats" etc. to
124 the adapter. At most one command is retired every millisecond by the
125 card. There is no out of order completion or notification. The
126 driver needs to check the return code of the command, waiting as
131 TX supply items are of variable length (scatter gather support) and
132 so the queue items are (more or less) pointers to the real thing.
133 Each TX supply item contains a unique, host-supplied handle (the skb
134 bus address seems most sensible as this works for Alphas as well,
135 there is no need to do any endian conversions on the handles).
137 TX return items consist of just the handles above.
139 3. RXQ (up to 4 of these with different lengths and buffer sizes)
141 RX supply items consist of a unique, host-supplied handle (the skb
142 bus address again) and a pointer to the buffer data area.
144 RX return items consist of the handle above, the VC, length and a
145 status word. This just screams "oh so easy" doesn't it?
147 Note on RX pool sizes:
149 Each pool should have enough buffers to handle a back-to-back stream
150 of minimum sized frames on a single VC. For example:
152 frame spacing = 3us (about right)
154 delay = IRQ lat + RX handling + RX buffer replenish = 20 (us) (a guess)
156 min number of buffers for one VC = 1 + delay/spacing (buffers)
158 delay/spacing = latency = (20+2)/3 = 7 (buffers) (rounding up)
160 The 20us delay assumes that there is no need to sleep; if we need to
161 sleep to get buffers we are going to drop frames anyway.
163 In fact, each pool should have enough buffers to support the
164 simultaneous reassembly of a separate frame on each VC and cope with
165 the case in which frames complete in round robin cell fashion on
168 Only one frame can complete at each cell arrival, so if "n" VCs are
169 open, the worst case is to have them all complete frames together
170 followed by all starting new frames together.
172 desired number of buffers = n + delay/spacing
174 These are the extreme requirements, however, they are "n+k" for some
175 "k" so we have only the constant to choose. This is the argument
176 rx_lats which current defaults to 7.
178 Actually, "n ? n+k : 0" is better and this is what is implemented,
179 subject to the limit given by the pool size.
183 Simple spinlocks are used around the TX and RX queue mechanisms.
184 Anyone with a faster, working method is welcome to implement it.
186 The adapter command queue is protected with a spinlock. We always
187 wait for commands to complete.
189 A more complex form of locking is used around parts of the VC open
190 and close functions. There are three reasons for a lock: 1. we need
191 to do atomic rate reservation and release (not used yet), 2. Opening
192 sometimes involves two adapter commands which must not be separated
193 by another command on the same VC, 3. the changes to RX pool size
194 must be atomic. The lock needs to work over context switches, so we
197 III Hardware Features and Microcode Bugs
201 *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
205 All structures that are not accessed using DMA must be 4-byte
206 aligned (not a problem) and must not cross 4MB boundaries.
208 There is a DMA memory hole at E0000000-E00000FF (groan).
210 TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
211 but for a hardware bug).
213 RX buffers (DMA write) must not cross 16MB boundaries and must
214 include spare trailing bytes up to the next 4-byte boundary; they
215 will be written with rubbish.
217 The PLX likes to prefetch; if reading up to 4 u32 past the end of
218 each TX fragment is not a problem, then TX can be made to go a
219 little faster by passing a flag at init that disables a prefetch
220 workaround. We do not pass this flag. (new microcode only)
223 . Note that alloc_skb rounds up size to a 16byte boundary.
224 . Ensure all areas do not traverse 4MB boundaries.
225 . Ensure all areas do not start at a E00000xx bus address.
226 (I cannot be certain, but this may always hold with Linux)
227 . Make all failures cause a loud message.
228 . Discard non-conforming SKBs (causes TX failure or RX fill delay).
229 . Discard non-conforming TX fragment descriptors (the TX fails).
230 In the future we could:
231 . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
232 . Segment TX areas into some/more fragments, when necessary.
233 . Relax checks for non-DMA items (ignore hole).
234 . Give scatter-gather (iovec) requirements using ???. (?)
236 3. VC close is broken (only for new microcode)
238 The VC close adapter microcode command fails to do anything if any
239 frames have been received on the VC but none have been transmitted.
240 Frames continue to be reassembled and passed (with IRQ) to the
247 . Timer code may be broken.
249 . Deal with buggy VC close (somehow) in microcode 12.
251 . Handle interrupted and/or non-blocking writes - is this a job for
254 . Add code to break up TX fragments when they span 4MB boundaries.
256 . Add SUNI phy layer (need to know where SUNI lives on card).
258 . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
259 leave extra headroom space for Ambassador TX descriptors.
261 . Understand these elements of struct atm_vcc: recvq (proto?),
262 sleep, callback, listenq, backlog_quota, reply and user_back.
264 . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
266 . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
268 . Decide whether RX buffer recycling is or can be made completely safe;
269 turn it back on. It looks like Werner is going to axe this.
271 . Implement QoS changes on open VCs (involves extracting parts of VC open
272 and close into separate functions and using them to make changes).
274 . Hack on command queue so that someone can issue multiple commands and wait
275 on the last one (OR only "no-op" or "wait" commands are waited for).
277 . Eliminate need for while-schedule around do_command.
281 static void do_housekeeping (struct timer_list *t);
282 /********** globals **********/
284 static unsigned short debug = 0;
285 static unsigned int cmds = 8;
286 static unsigned int txs = 32;
287 static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
288 static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
289 static unsigned int rx_lats = 7;
290 static unsigned char pci_lat = 0;
292 static const unsigned long onegigmask = -1 << 30;
294 /********** access to adapter **********/
296 static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
297 PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
299 dev->membase[addr / sizeof(u32)] = data;
301 outl (data, dev->iobase + addr);
305 static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
307 u32 data = dev->membase[addr / sizeof(u32)];
309 u32 data = inl (dev->iobase + addr);
311 PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
315 static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
316 __be32 be = cpu_to_be32 (data);
317 PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
319 dev->membase[addr / sizeof(u32)] = be;
321 outl (be, dev->iobase + addr);
325 static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
327 __be32 be = dev->membase[addr / sizeof(u32)];
329 __be32 be = inl (dev->iobase + addr);
331 u32 data = be32_to_cpu (be);
332 PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
336 /********** dump routines **********/
338 static inline void dump_registers (const amb_dev * dev) {
339 #ifdef DEBUG_AMBASSADOR
340 if (debug & DBG_REGS) {
342 PRINTD (DBG_REGS, "reading PLX control: ");
343 for (i = 0x00; i < 0x30; i += sizeof(u32))
345 PRINTD (DBG_REGS, "reading mailboxes: ");
346 for (i = 0x40; i < 0x60; i += sizeof(u32))
348 PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
349 for (i = 0x60; i < 0x70; i += sizeof(u32))
358 static inline void dump_loader_block (volatile loader_block * lb) {
359 #ifdef DEBUG_AMBASSADOR
361 PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
362 lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
363 for (i = 0; i < MAX_COMMAND_DATA; ++i)
364 PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
365 PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
372 static inline void dump_command (command * cmd) {
373 #ifdef DEBUG_AMBASSADOR
375 PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
376 cmd, /*be32_to_cpu*/ (cmd->request));
377 for (i = 0; i < 3; ++i)
378 PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
379 PRINTDE (DBG_CMD, "");
386 static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
387 #ifdef DEBUG_AMBASSADOR
389 unsigned char * data = skb->data;
390 PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
391 for (i=0; i<skb->len && i < 256;i++)
392 PRINTDM (DBG_DATA, "%02x ", data[i]);
393 PRINTDE (DBG_DATA,"");
402 /********** check memory areas for use by Ambassador **********/
404 /* see limitations under Hardware Features */
406 static int check_area (void * start, size_t length) {
407 // assumes length > 0
408 const u32 fourmegmask = -1 << 22;
409 const u32 twofivesixmask = -1 << 8;
410 const u32 starthole = 0xE0000000;
411 u32 startaddress = virt_to_bus (start);
412 u32 lastaddress = startaddress+length-1;
413 if ((startaddress ^ lastaddress) & fourmegmask ||
414 (startaddress & twofivesixmask) == starthole) {
415 PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
416 startaddress, lastaddress);
423 /********** free an skb (as per ATM device driver documentation) **********/
425 static void amb_kfree_skb (struct sk_buff * skb) {
426 if (ATM_SKB(skb)->vcc->pop) {
427 ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
429 dev_kfree_skb_any (skb);
433 /********** TX completion **********/
435 static void tx_complete (amb_dev * dev, tx_out * tx) {
436 tx_simple * tx_descr = bus_to_virt (tx->handle);
437 struct sk_buff * skb = tx_descr->skb;
439 PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
442 atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
444 // free the descriptor
454 /********** RX completion **********/
456 static void rx_complete (amb_dev * dev, rx_out * rx) {
457 struct sk_buff * skb = bus_to_virt (rx->handle);
458 u16 vc = be16_to_cpu (rx->vc);
459 // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
460 u16 status = be16_to_cpu (rx->status);
461 u16 rx_len = be16_to_cpu (rx->length);
463 PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
465 // XXX move this in and add to VC stats ???
467 struct atm_vcc * atm_vcc = dev->rxer[vc];
472 if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
474 if (atm_charge (atm_vcc, skb->truesize)) {
476 // prepare socket buffer
477 ATM_SKB(skb)->vcc = atm_vcc;
478 skb_put (skb, rx_len);
480 dump_skb ("<<<", vc, skb);
483 atomic_inc(&atm_vcc->stats->rx);
484 __net_timestamp(skb);
485 // end of our responsibility
486 atm_vcc->push (atm_vcc, skb);
490 // someone fix this (message), please!
491 PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
492 // drop stats incremented in atm_charge
496 PRINTK (KERN_INFO, "dropped over-size frame");
497 // should we count this?
498 atomic_inc(&atm_vcc->stats->rx_drop);
502 PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
503 // this is an adapter bug, only in new version of microcode
507 dev->stats.rx.error++;
508 if (status & CRC_ERR)
509 dev->stats.rx.badcrc++;
510 if (status & LEN_ERR)
511 dev->stats.rx.toolong++;
512 if (status & ABORT_ERR)
513 dev->stats.rx.aborted++;
514 if (status & UNUSED_ERR)
515 dev->stats.rx.unused++;
518 dev_kfree_skb_any (skb);
524 Note on queue handling.
526 Here "give" and "take" refer to queue entries and a queue (pair)
527 rather than frames to or from the host or adapter. Empty frame
528 buffers are given to the RX queue pair and returned unused or
529 containing RX frames. TX frames (well, pointers to TX fragment
530 lists) are given to the TX queue pair, completions are returned.
534 /********** command queue **********/
536 // I really don't like this, but it's the best I can do at the moment
538 // also, the callers are responsible for byte order as the microcode
539 // sometimes does 16-bit accesses (yuk yuk yuk)
541 static int command_do (amb_dev * dev, command * cmd) {
542 amb_cq * cq = &dev->cq;
543 volatile amb_cq_ptrs * ptrs = &cq->ptrs;
546 PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
548 if (test_bit (dead, &dev->flags))
551 spin_lock (&cq->lock);
554 if (cq->pending < cq->maximum) {
555 // remember my slot for later
557 PRINTD (DBG_CMD, "command in slot %p", my_slot);
564 ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
567 wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
569 if (cq->pending > cq->high)
570 cq->high = cq->pending;
571 spin_unlock (&cq->lock);
573 // these comments were in a while-loop before, msleep removes the loop
575 // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
578 // wait for my slot to be reached (all waiters are here or above, until...)
579 while (ptrs->out != my_slot) {
580 PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
581 set_current_state(TASK_UNINTERRUPTIBLE);
585 // wait on my slot (... one gets to its slot, and... )
586 while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
587 PRINTD (DBG_CMD, "wait: command slot completion");
588 set_current_state(TASK_UNINTERRUPTIBLE);
592 PRINTD (DBG_CMD, "command complete");
593 // update queue (... moves the queue along to the next slot)
594 spin_lock (&cq->lock);
598 ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
599 spin_unlock (&cq->lock);
604 spin_unlock (&cq->lock);
610 /********** TX queue pair **********/
612 static int tx_give (amb_dev * dev, tx_in * tx) {
613 amb_txq * txq = &dev->txq;
616 PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
618 if (test_bit (dead, &dev->flags))
621 spin_lock_irqsave (&txq->lock, flags);
623 if (txq->pending < txq->maximum) {
624 PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
628 txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
629 // hand over the TX and ring the bell
630 wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
631 wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
633 if (txq->pending > txq->high)
634 txq->high = txq->pending;
635 spin_unlock_irqrestore (&txq->lock, flags);
639 spin_unlock_irqrestore (&txq->lock, flags);
644 static int tx_take (amb_dev * dev) {
645 amb_txq * txq = &dev->txq;
648 PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
650 spin_lock_irqsave (&txq->lock, flags);
652 if (txq->pending && txq->out.ptr->handle) {
653 // deal with TX completion
654 tx_complete (dev, txq->out.ptr);
656 txq->out.ptr->handle = 0;
659 txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
661 spin_unlock_irqrestore (&txq->lock, flags);
665 spin_unlock_irqrestore (&txq->lock, flags);
670 /********** RX queue pairs **********/
672 static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
673 amb_rxq * rxq = &dev->rxq[pool];
676 PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
678 spin_lock_irqsave (&rxq->lock, flags);
680 if (rxq->pending < rxq->maximum) {
681 PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
685 rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
686 // hand over the RX buffer
687 wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
689 spin_unlock_irqrestore (&rxq->lock, flags);
692 spin_unlock_irqrestore (&rxq->lock, flags);
697 static int rx_take (amb_dev * dev, unsigned char pool) {
698 amb_rxq * rxq = &dev->rxq[pool];
701 PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
703 spin_lock_irqsave (&rxq->lock, flags);
705 if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
706 // deal with RX completion
707 rx_complete (dev, rxq->out.ptr);
709 rxq->out.ptr->status = 0;
710 rxq->out.ptr->length = 0;
713 rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
715 if (rxq->pending < rxq->low)
716 rxq->low = rxq->pending;
717 spin_unlock_irqrestore (&rxq->lock, flags);
720 if (!rxq->pending && rxq->buffers_wanted)
722 spin_unlock_irqrestore (&rxq->lock, flags);
727 /********** RX Pool handling **********/
729 /* pre: buffers_wanted = 0, post: pending = 0 */
730 static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
731 amb_rxq * rxq = &dev->rxq[pool];
733 PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
735 if (test_bit (dead, &dev->flags))
738 /* we are not quite like the fill pool routines as we cannot just
739 remove one buffer, we have to remove all of them, but we might as
741 if (rxq->pending > rxq->buffers_wanted) {
743 cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
744 cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
745 while (command_do (dev, &cmd))
747 /* the pool may also be emptied via the interrupt handler */
748 while (rxq->pending > rxq->buffers_wanted)
749 if (rx_take (dev, pool))
756 static void drain_rx_pools (amb_dev * dev) {
759 PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
761 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
762 drain_rx_pool (dev, pool);
765 static void fill_rx_pool (amb_dev * dev, unsigned char pool,
771 PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
773 if (test_bit (dead, &dev->flags))
776 rxq = &dev->rxq[pool];
777 while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
779 struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
781 PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
784 if (check_area (skb->data, skb->truesize)) {
785 dev_kfree_skb_any (skb);
788 // cast needed as there is no %? for pointer differences
789 PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
790 skb, skb->head, (long) skb_end_offset(skb));
791 rx.handle = virt_to_bus (skb);
792 rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
793 if (rx_give (dev, &rx, pool))
794 dev_kfree_skb_any (skb);
801 // top up all RX pools
802 static void fill_rx_pools (amb_dev * dev) {
805 PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
807 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
808 fill_rx_pool (dev, pool, GFP_ATOMIC);
813 /********** enable host interrupts **********/
815 static void interrupts_on (amb_dev * dev) {
816 wr_plain (dev, offsetof(amb_mem, interrupt_control),
817 rd_plain (dev, offsetof(amb_mem, interrupt_control))
818 | AMB_INTERRUPT_BITS);
821 /********** disable host interrupts **********/
823 static void interrupts_off (amb_dev * dev) {
824 wr_plain (dev, offsetof(amb_mem, interrupt_control),
825 rd_plain (dev, offsetof(amb_mem, interrupt_control))
826 &~ AMB_INTERRUPT_BITS);
829 /********** interrupt handling **********/
831 static irqreturn_t interrupt_handler(int irq, void *dev_id) {
832 amb_dev * dev = dev_id;
834 PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
837 u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
839 // for us or someone else sharing the same interrupt
841 PRINTD (DBG_IRQ, "irq not for me: %d", irq);
846 PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
847 wr_plain (dev, offsetof(amb_mem, interrupt), -1);
851 unsigned int irq_work = 0;
853 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
854 while (!rx_take (dev, pool))
856 while (!tx_take (dev))
862 PRINTD (DBG_IRQ, "work done: %u", irq_work);
864 PRINTD (DBG_IRQ|DBG_WARN, "no work done");
868 PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
872 /********** make rate (not quite as much fun as Horizon) **********/
874 static int make_rate (unsigned int rate, rounding r,
875 u16 * bits, unsigned int * actual) {
876 unsigned char exp = -1; // hush gcc
877 unsigned int man = -1; // hush gcc
879 PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
881 // rates in cells per second, ITU format (nasty 16-bit floating-point)
882 // given 5-bit e and 9-bit m:
883 // rate = EITHER (1+m/2^9)*2^e OR 0
884 // bits = EITHER 1<<14 | e<<9 | m OR 0
885 // (bit 15 is "reserved", bit 14 "non-zero")
886 // smallest rate is 0 (special representation)
887 // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
888 // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
890 // find position of top bit, this gives e
891 // remove top bit and shift (rounding if feeling clever) by 9-e
893 // ucode bug: please don't set bit 14! so 0 rate not representable
895 if (rate > 0xffc00000U) {
896 // larger than largest representable rate
906 // representable rate
911 // invariant: rate = man*2^(exp-31)
912 while (!(man & (1<<31))) {
917 // man has top bit set
918 // rate = (2^31+(man-2^31))*2^(exp-31)
919 // rate = (1+(man-2^31)/2^31)*2^exp
921 man &= 0xffffffffU; // a nop on 32-bit systems
922 // rate = (1+man/2^32)*2^exp
924 // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
925 // time to lose significance... we want m in the range 0 to 2^9-1
926 // rounding presents a minor problem... we first decide which way
927 // we are rounding (based on given rounding direction and possibly
928 // the bits of the mantissa that are to be discarded).
937 // check all bits that we are discarding
938 if (man & (~0U>>9)) {
939 man = (man>>(32-9)) + 1;
941 // no need to check for round up outside of range
950 case round_nearest: {
951 // check msb that we are discarding
952 if (man & (1<<(32-9-1))) {
953 man = (man>>(32-9)) + 1;
955 // no need to check for round up outside of range
967 // zero rate - not representable
969 if (r == round_down) {
978 PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
981 *bits = /* (1<<14) | */ (exp<<9) | man;
985 ? (1 << exp) + (man << (exp-9))
986 : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
991 /********** Linux ATM Operations **********/
993 // some are not yet implemented while others do not make sense for
996 /********** Open a VC **********/
998 static int amb_open (struct atm_vcc * atm_vcc)
1002 struct atm_qos * qos;
1003 struct atm_trafprm * txtp;
1004 struct atm_trafprm * rxtp;
1005 u16 tx_rate_bits = -1; // hush gcc
1006 u16 tx_vc_bits = -1; // hush gcc
1007 u16 tx_frame_bits = -1; // hush gcc
1009 amb_dev * dev = AMB_DEV(atm_vcc->dev);
1011 unsigned char pool = -1; // hush gcc
1012 short vpi = atm_vcc->vpi;
1013 int vci = atm_vcc->vci;
1015 PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1017 #ifdef ATM_VPI_UNSPEC
1018 // UNSPEC is deprecated, remove this code eventually
1019 if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1020 PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1025 if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1026 0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1027 PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1031 qos = &atm_vcc->qos;
1033 if (qos->aal != ATM_AAL5) {
1034 PRINTD (DBG_QOS, "AAL not supported");
1038 // traffic parameters
1040 PRINTD (DBG_QOS, "TX:");
1042 if (txtp->traffic_class != ATM_NONE) {
1043 switch (txtp->traffic_class) {
1045 // we take "the PCR" as a rate-cap
1046 int pcr = atm_pcr_goal (txtp);
1050 tx_vc_bits = TX_UBR;
1051 tx_frame_bits = TX_FRAME_NOTCAP;
1060 error = make_rate (pcr, r, &tx_rate_bits, NULL);
1063 tx_vc_bits = TX_UBR_CAPPED;
1064 tx_frame_bits = TX_FRAME_CAPPED;
1070 pcr = atm_pcr_goal (txtp);
1071 PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1076 // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1077 PRINTD (DBG_QOS, "request for non-UBR denied");
1081 PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1082 tx_rate_bits, tx_vc_bits);
1085 PRINTD (DBG_QOS, "RX:");
1087 if (rxtp->traffic_class == ATM_NONE) {
1090 // choose an RX pool (arranged in increasing size)
1091 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1092 if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1093 PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1094 pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1097 if (pool == NUM_RX_POOLS) {
1098 PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1099 "no pool suitable for VC (RX max_sdu %d is too large)",
1104 switch (rxtp->traffic_class) {
1110 pcr = atm_pcr_goal (rxtp);
1111 PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1116 // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1117 PRINTD (DBG_QOS, "request for non-UBR denied");
1123 // get space for our vcc stuff
1124 vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1126 PRINTK (KERN_ERR, "out of memory!");
1129 atm_vcc->dev_data = (void *) vcc;
1131 // no failures beyond this point
1133 // we are not really "immediately before allocating the connection
1134 // identifier in hardware", but it will just have to do!
1135 set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1137 if (txtp->traffic_class != ATM_NONE) {
1140 vcc->tx_frame_bits = tx_frame_bits;
1142 mutex_lock(&dev->vcc_sf);
1143 if (dev->rxer[vci]) {
1144 // RXer on the channel already, just modify rate...
1145 cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1146 cmd.args.modify_rate.vc = cpu_to_be32 (vci); // vpi 0
1147 cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1148 while (command_do (dev, &cmd))
1150 // ... and TX flags, preserving the RX pool
1151 cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1152 cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0
1153 cmd.args.modify_flags.flags = cpu_to_be32
1154 ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1155 | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1156 while (command_do (dev, &cmd))
1159 // no RXer on the channel, just open (with pool zero)
1160 cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1161 cmd.args.open.vc = cpu_to_be32 (vci); // vpi 0
1162 cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1163 cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1164 while (command_do (dev, &cmd))
1167 dev->txer[vci].tx_present = 1;
1168 mutex_unlock(&dev->vcc_sf);
1171 if (rxtp->traffic_class != ATM_NONE) {
1174 vcc->rx_info.pool = pool;
1176 mutex_lock(&dev->vcc_sf);
1177 /* grow RX buffer pool */
1178 if (!dev->rxq[pool].buffers_wanted)
1179 dev->rxq[pool].buffers_wanted = rx_lats;
1180 dev->rxq[pool].buffers_wanted += 1;
1181 fill_rx_pool (dev, pool, GFP_KERNEL);
1183 if (dev->txer[vci].tx_present) {
1184 // TXer on the channel already
1185 // switch (from pool zero) to this pool, preserving the TX bits
1186 cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1187 cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0
1188 cmd.args.modify_flags.flags = cpu_to_be32
1189 ( (pool << SRB_POOL_SHIFT)
1190 | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1192 // no TXer on the channel, open the VC (with no rate info)
1193 cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1194 cmd.args.open.vc = cpu_to_be32 (vci); // vpi 0
1195 cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1196 cmd.args.open.rate = cpu_to_be32 (0);
1198 while (command_do (dev, &cmd))
1200 // this link allows RX frames through
1201 dev->rxer[vci] = atm_vcc;
1202 mutex_unlock(&dev->vcc_sf);
1205 // indicate readiness
1206 set_bit(ATM_VF_READY,&atm_vcc->flags);
1211 /********** Close a VC **********/
1213 static void amb_close (struct atm_vcc * atm_vcc) {
1214 amb_dev * dev = AMB_DEV (atm_vcc->dev);
1215 amb_vcc * vcc = AMB_VCC (atm_vcc);
1216 u16 vci = atm_vcc->vci;
1218 PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1220 // indicate unreadiness
1221 clear_bit(ATM_VF_READY,&atm_vcc->flags);
1224 if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1227 mutex_lock(&dev->vcc_sf);
1228 if (dev->rxer[vci]) {
1229 // RXer still on the channel, just modify rate... XXX not really needed
1230 cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1231 cmd.args.modify_rate.vc = cpu_to_be32 (vci); // vpi 0
1232 cmd.args.modify_rate.rate = cpu_to_be32 (0);
1233 // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1235 // no RXer on the channel, close channel
1236 cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1237 cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1239 dev->txer[vci].tx_present = 0;
1240 while (command_do (dev, &cmd))
1242 mutex_unlock(&dev->vcc_sf);
1246 if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1249 // this is (the?) one reason why we need the amb_vcc struct
1250 unsigned char pool = vcc->rx_info.pool;
1252 mutex_lock(&dev->vcc_sf);
1253 if (dev->txer[vci].tx_present) {
1254 // TXer still on the channel, just go to pool zero XXX not really needed
1255 cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1256 cmd.args.modify_flags.vc = cpu_to_be32 (vci); // vpi 0
1257 cmd.args.modify_flags.flags = cpu_to_be32
1258 (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1260 // no TXer on the channel, close the VC
1261 cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1262 cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1264 // forget the rxer - no more skbs will be pushed
1265 if (atm_vcc != dev->rxer[vci])
1266 PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1267 "arghhh! we're going to die!",
1268 vcc, dev->rxer[vci]);
1269 dev->rxer[vci] = NULL;
1270 while (command_do (dev, &cmd))
1273 /* shrink RX buffer pool */
1274 dev->rxq[pool].buffers_wanted -= 1;
1275 if (dev->rxq[pool].buffers_wanted == rx_lats) {
1276 dev->rxq[pool].buffers_wanted = 0;
1277 drain_rx_pool (dev, pool);
1279 mutex_unlock(&dev->vcc_sf);
1282 // free our structure
1285 // say the VPI/VCI is free again
1286 clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1291 /********** Send **********/
1293 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1294 amb_dev * dev = AMB_DEV(atm_vcc->dev);
1295 amb_vcc * vcc = AMB_VCC(atm_vcc);
1296 u16 vc = atm_vcc->vci;
1297 unsigned int tx_len = skb->len;
1298 unsigned char * tx_data = skb->data;
1299 tx_simple * tx_descr;
1302 if (test_bit (dead, &dev->flags))
1305 PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1306 vc, tx_data, tx_len);
1308 dump_skb (">>>", vc, skb);
1310 if (!dev->txer[vc].tx_present) {
1311 PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1315 // this is a driver private field so we have to set it ourselves,
1316 // despite the fact that we are _required_ to use it to check for a
1318 ATM_SKB(skb)->vcc = atm_vcc;
1320 if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1321 PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1325 if (check_area (skb->data, skb->len)) {
1326 atomic_inc(&atm_vcc->stats->tx_err);
1327 return -ENOMEM; // ?
1330 // allocate memory for fragments
1331 tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1333 PRINTK (KERN_ERR, "could not allocate TX descriptor");
1336 if (check_area (tx_descr, sizeof(tx_simple))) {
1340 PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1342 tx_descr->skb = skb;
1344 tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1345 tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1347 tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1348 tx_descr->tx_frag_end.vc = 0;
1349 tx_descr->tx_frag_end.next_descriptor_length = 0;
1350 tx_descr->tx_frag_end.next_descriptor = 0;
1351 #ifdef AMB_NEW_MICROCODE
1352 tx_descr->tx_frag_end.cpcs_uu = 0;
1353 tx_descr->tx_frag_end.cpi = 0;
1354 tx_descr->tx_frag_end.pad = 0;
1357 tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1358 tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1359 tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1361 while (tx_give (dev, &tx))
1366 /********** Change QoS on a VC **********/
1368 // int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1370 /********** Free RX Socket Buffer **********/
1373 static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1374 amb_dev * dev = AMB_DEV (atm_vcc->dev);
1375 amb_vcc * vcc = AMB_VCC (atm_vcc);
1376 unsigned char pool = vcc->rx_info.pool;
1379 // This may be unsafe for various reasons that I cannot really guess
1380 // at. However, I note that the ATM layer calls kfree_skb rather
1381 // than dev_kfree_skb at this point so we are least covered as far
1382 // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1384 PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1387 rx.handle = virt_to_bus (skb);
1388 rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1390 skb->data = skb->head;
1391 skb_reset_tail_pointer(skb);
1394 if (!rx_give (dev, &rx, pool)) {
1396 PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1400 // just do what the ATM layer would have done
1401 dev_kfree_skb_any (skb);
1407 /********** Proc File Output **********/
1409 static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1410 amb_dev * dev = AMB_DEV (atm_dev);
1414 PRINTD (DBG_FLOW, "amb_proc_read");
1416 /* more diagnostics here? */
1419 amb_stats * s = &dev->stats;
1420 return sprintf (page,
1421 "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1422 "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1423 s->tx_ok, s->rx.ok, s->rx.error,
1424 s->rx.badcrc, s->rx.toolong,
1425 s->rx.aborted, s->rx.unused);
1429 amb_cq * c = &dev->cq;
1430 return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1431 c->pending, c->high, c->maximum);
1435 amb_txq * t = &dev->txq;
1436 return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1437 t->pending, t->maximum, t->high, t->filled);
1441 unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1442 for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1443 amb_rxq * r = &dev->rxq[pool];
1444 count += sprintf (page+count, " %u/%u/%u %u %u",
1445 r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1447 count += sprintf (page+count, ".\n");
1452 unsigned int count = sprintf (page, "RX buffer sizes:");
1453 for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1454 amb_rxq * r = &dev->rxq[pool];
1455 count += sprintf (page+count, " %u", r->buffer_size);
1457 count += sprintf (page+count, ".\n");
1470 /********** Operation Structure **********/
1472 static const struct atmdev_ops amb_ops = {
1476 .proc_read = amb_proc_read,
1477 .owner = THIS_MODULE,
1480 /********** housekeeping **********/
1481 static void do_housekeeping (struct timer_list *t) {
1482 amb_dev * dev = from_timer(dev, t, housekeeping);
1484 // could collect device-specific (not driver/atm-linux) stats here
1486 // last resort refill once every ten seconds
1487 fill_rx_pools (dev);
1488 mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1493 /********** creation of communication queues **********/
1495 static int create_queues(amb_dev *dev, unsigned int cmds, unsigned int txs,
1496 unsigned int *rxs, unsigned int *rx_buffer_sizes)
1503 PRINTD (DBG_FLOW, "create_queues %p", dev);
1505 total += cmds * sizeof(command);
1507 total += txs * (sizeof(tx_in) + sizeof(tx_out));
1509 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1510 total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1512 memory = kmalloc (total, GFP_KERNEL);
1514 PRINTK (KERN_ERR, "could not allocate queues");
1517 if (check_area (memory, total)) {
1518 PRINTK (KERN_ERR, "queues allocated in nasty area");
1523 limit = memory + total;
1524 PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1526 PRINTD (DBG_CMD, "command queue at %p", memory);
1529 command * cmd = memory;
1530 amb_cq * cq = &dev->cq;
1534 cq->maximum = cmds - 1;
1536 cq->ptrs.start = cmd;
1539 cq->ptrs.limit = cmd + cmds;
1541 memory = cq->ptrs.limit;
1544 PRINTD (DBG_TX, "TX queue pair at %p", memory);
1547 tx_in * in = memory;
1549 amb_txq * txq = &dev->txq;
1554 txq->maximum = txs - 1;
1558 txq->in.limit = in + txs;
1560 memory = txq->in.limit;
1563 txq->out.start = out;
1565 txq->out.limit = out + txs;
1567 memory = txq->out.limit;
1570 PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1572 for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1573 rx_in * in = memory;
1575 amb_rxq * rxq = &dev->rxq[pool];
1577 rxq->buffer_size = rx_buffer_sizes[pool];
1578 rxq->buffers_wanted = 0;
1581 rxq->low = rxs[pool] - 1;
1583 rxq->maximum = rxs[pool] - 1;
1587 rxq->in.limit = in + rxs[pool];
1589 memory = rxq->in.limit;
1592 rxq->out.start = out;
1594 rxq->out.limit = out + rxs[pool];
1596 memory = rxq->out.limit;
1599 if (memory == limit) {
1602 PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1603 kfree (limit - total);
1609 /********** destruction of communication queues **********/
1611 static void destroy_queues (amb_dev * dev) {
1612 // all queues assumed empty
1613 void * memory = dev->cq.ptrs.start;
1614 // includes txq.in, txq.out, rxq[].in and rxq[].out
1616 PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1618 PRINTD (DBG_INIT, "freeing queues at %p", memory);
1624 /********** basic loader commands and error handling **********/
1625 // centisecond timeouts - guessing away here
1626 static unsigned int command_timeouts [] = {
1627 [host_memory_test] = 15,
1628 [read_adapter_memory] = 2,
1629 [write_adapter_memory] = 2,
1630 [adapter_start] = 50,
1631 [get_version_number] = 10,
1632 [interrupt_host] = 1,
1633 [flash_erase_sector] = 1,
1634 [adap_download_block] = 1,
1635 [adap_erase_flash] = 1,
1636 [adap_run_in_iram] = 1,
1637 [adap_end_download] = 1
1641 static unsigned int command_successes [] = {
1642 [host_memory_test] = COMMAND_PASSED_TEST,
1643 [read_adapter_memory] = COMMAND_READ_DATA_OK,
1644 [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1645 [adapter_start] = COMMAND_COMPLETE,
1646 [get_version_number] = COMMAND_COMPLETE,
1647 [interrupt_host] = COMMAND_COMPLETE,
1648 [flash_erase_sector] = COMMAND_COMPLETE,
1649 [adap_download_block] = COMMAND_COMPLETE,
1650 [adap_erase_flash] = COMMAND_COMPLETE,
1651 [adap_run_in_iram] = COMMAND_COMPLETE,
1652 [adap_end_download] = COMMAND_COMPLETE
1655 static int decode_loader_result (loader_command cmd, u32 result)
1660 if (result == command_successes[cmd])
1666 msg = "bad command";
1668 case COMMAND_IN_PROGRESS:
1670 msg = "command in progress";
1672 case COMMAND_PASSED_TEST:
1674 msg = "command passed test";
1676 case COMMAND_FAILED_TEST:
1678 msg = "command failed test";
1680 case COMMAND_READ_DATA_OK:
1682 msg = "command read data ok";
1684 case COMMAND_READ_BAD_ADDRESS:
1686 msg = "command read bad address";
1688 case COMMAND_WRITE_DATA_OK:
1690 msg = "command write data ok";
1692 case COMMAND_WRITE_BAD_ADDRESS:
1694 msg = "command write bad address";
1696 case COMMAND_WRITE_FLASH_FAILURE:
1698 msg = "command write flash failure";
1700 case COMMAND_COMPLETE:
1702 msg = "command complete";
1704 case COMMAND_FLASH_ERASE_FAILURE:
1706 msg = "command flash erase failure";
1708 case COMMAND_WRITE_BAD_DATA:
1710 msg = "command write bad data";
1714 msg = "unknown error";
1715 PRINTD (DBG_LOAD|DBG_ERR,
1716 "decode_loader_result got %d=%x !",
1721 PRINTK (KERN_ERR, "%s", msg);
1725 static int do_loader_command(volatile loader_block *lb, const amb_dev *dev,
1729 unsigned long timeout;
1731 PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1735 Set the return value to zero, set the command type and set the
1736 valid entry to the right magic value. The payload is already
1737 correctly byte-ordered so we leave it alone. Hit the doorbell
1738 with the bus address of this structure.
1743 lb->command = cpu_to_be32 (cmd);
1744 lb->valid = cpu_to_be32 (DMA_VALID);
1745 // dump_registers (dev);
1746 // dump_loader_block (lb);
1747 wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1749 timeout = command_timeouts[cmd] * 10;
1751 while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1753 timeout = msleep_interruptible(timeout);
1755 PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1756 dump_registers (dev);
1757 dump_loader_block (lb);
1761 if (cmd == adapter_start) {
1762 // wait for start command to acknowledge...
1764 while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1766 timeout = msleep_interruptible(timeout);
1768 PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1769 be32_to_cpu (lb->result));
1770 dump_registers (dev);
1775 return decode_loader_result (cmd, be32_to_cpu (lb->result));
1780 /* loader: determine loader version */
1782 static int get_loader_version(loader_block *lb, const amb_dev *dev,
1787 PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1789 res = do_loader_command (lb, dev, get_version_number);
1793 *version = be32_to_cpu (lb->payload.version);
1797 /* loader: write memory data blocks */
1799 static int loader_write(loader_block *lb, const amb_dev *dev,
1800 const struct ihex_binrec *rec)
1802 transfer_block * tb = &lb->payload.transfer;
1804 PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1806 tb->address = rec->addr;
1807 tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1808 memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
1809 return do_loader_command (lb, dev, write_adapter_memory);
1812 /* loader: verify memory data blocks */
1814 static int loader_verify(loader_block *lb, const amb_dev *dev,
1815 const struct ihex_binrec *rec)
1817 transfer_block * tb = &lb->payload.transfer;
1820 PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1822 tb->address = rec->addr;
1823 tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1824 res = do_loader_command (lb, dev, read_adapter_memory);
1825 if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
1830 /* loader: start microcode */
1832 static int loader_start(loader_block *lb, const amb_dev *dev, u32 address)
1834 PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1836 lb->payload.start = cpu_to_be32 (address);
1837 return do_loader_command (lb, dev, adapter_start);
1840 /********** reset card **********/
1842 static inline void sf (const char * msg)
1844 PRINTK (KERN_ERR, "self-test failed: %s", msg);
1847 static int amb_reset (amb_dev * dev, int diags) {
1850 PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1852 word = rd_plain (dev, offsetof(amb_mem, reset_control));
1853 // put card into reset state
1854 wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1855 // wait a short while
1858 // put card into known good state
1859 wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1860 // clear all interrupts just in case
1861 wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1863 // clear self-test done flag
1864 wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1865 // take card out of reset state
1866 wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1869 unsigned long timeout;
1872 // half second time-out
1874 while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1876 timeout = msleep_interruptible(timeout);
1878 PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1882 // get results of self-test
1883 // XXX double check byte-order
1884 word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1885 if (word & SELF_TEST_FAILURE) {
1886 if (word & GPINT_TST_FAILURE)
1888 if (word & SUNI_DATA_PATTERN_FAILURE)
1889 sf ("SUNI data pattern");
1890 if (word & SUNI_DATA_BITS_FAILURE)
1891 sf ("SUNI data bits");
1892 if (word & SUNI_UTOPIA_FAILURE)
1893 sf ("SUNI UTOPIA interface");
1894 if (word & SUNI_FIFO_FAILURE)
1895 sf ("SUNI cell buffer FIFO");
1896 if (word & SRAM_FAILURE)
1898 // better return value?
1906 /********** transfer and start the microcode **********/
1908 static int ucode_init(loader_block *lb, amb_dev *dev)
1910 const struct firmware *fw;
1911 unsigned long start_address;
1912 const struct ihex_binrec *rec;
1913 const char *errmsg = NULL;
1916 res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
1918 PRINTK (KERN_ERR, "Cannot load microcode data");
1922 /* First record contains just the start address */
1923 rec = (const struct ihex_binrec *)fw->data;
1924 if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
1925 errmsg = "no start record";
1928 start_address = be32_to_cpup((__be32 *)rec->data);
1930 rec = ihex_next_binrec(rec);
1932 PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1935 PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
1936 be16_to_cpu(rec->len));
1937 if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
1938 errmsg = "record too long";
1941 if (be16_to_cpu(rec->len) & 3) {
1942 errmsg = "odd number of bytes";
1945 res = loader_write(lb, dev, rec);
1949 res = loader_verify(lb, dev, rec);
1952 rec = ihex_next_binrec(rec);
1954 release_firmware(fw);
1956 res = loader_start(lb, dev, start_address);
1960 release_firmware(fw);
1961 PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
1965 /********** give adapter parameters **********/
1967 static inline __be32 bus_addr(void * addr) {
1968 return cpu_to_be32 (virt_to_bus (addr));
1971 static int amb_talk(amb_dev *dev)
1975 unsigned long timeout;
1977 PRINTD (DBG_FLOW, "amb_talk %p", dev);
1979 a.command_start = bus_addr (dev->cq.ptrs.start);
1980 a.command_end = bus_addr (dev->cq.ptrs.limit);
1981 a.tx_start = bus_addr (dev->txq.in.start);
1982 a.tx_end = bus_addr (dev->txq.in.limit);
1983 a.txcom_start = bus_addr (dev->txq.out.start);
1984 a.txcom_end = bus_addr (dev->txq.out.limit);
1986 for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1987 // the other "a" items are set up by the adapter
1988 a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
1989 a.rec_struct[pool].buffer_end = bus_addr (dev->rxq[pool].in.limit);
1990 a.rec_struct[pool].rx_start = bus_addr (dev->rxq[pool].out.start);
1991 a.rec_struct[pool].rx_end = bus_addr (dev->rxq[pool].out.limit);
1992 a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
1995 #ifdef AMB_NEW_MICROCODE
1996 // disable fast PLX prefetching
2000 // pass the structure
2001 wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2003 // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2005 // give the adapter another half second?
2007 while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2009 timeout = msleep_interruptible(timeout);
2011 PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2018 // get microcode version
2019 static void amb_ucode_version(amb_dev *dev)
2024 cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2025 while (command_do (dev, &cmd)) {
2026 set_current_state(TASK_UNINTERRUPTIBLE);
2029 major = be32_to_cpu (cmd.args.version.major);
2030 minor = be32_to_cpu (cmd.args.version.minor);
2031 PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2034 // get end station address
2035 static void amb_esi(amb_dev *dev, u8 *esi)
2041 cmd.request = cpu_to_be32 (SRB_GET_BIA);
2042 while (command_do (dev, &cmd)) {
2043 set_current_state(TASK_UNINTERRUPTIBLE);
2046 lower4 = be32_to_cpu (cmd.args.bia.lower4);
2047 upper2 = be32_to_cpu (cmd.args.bia.upper2);
2048 PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2053 PRINTDB (DBG_INIT, "ESI:");
2054 for (i = 0; i < ESI_LEN; ++i) {
2056 esi[i] = bitrev8(lower4>>(8*i));
2058 esi[i] = bitrev8(upper2>>(8*(i-4)));
2059 PRINTDM (DBG_INIT, " %02x", esi[i]);
2062 PRINTDE (DBG_INIT, "");
2068 static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2070 // fix up the PLX-mapped window base address to match the block
2073 blb = virt_to_bus(lb);
2074 // the kernel stack had better not ever cross a 1Gb boundary!
2075 mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2076 mapreg &= ~onegigmask;
2077 mapreg |= blb & onegigmask;
2078 wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2082 static int amb_init(amb_dev *dev)
2088 if (amb_reset (dev, 1)) {
2089 PRINTK (KERN_ERR, "card reset failed!");
2091 fixup_plx_window (dev, &lb);
2093 if (get_loader_version (&lb, dev, &version)) {
2094 PRINTK (KERN_INFO, "failed to get loader version");
2096 PRINTK (KERN_INFO, "loader version is %08x", version);
2098 if (ucode_init (&lb, dev)) {
2099 PRINTK (KERN_ERR, "microcode failure");
2100 } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2101 PRINTK (KERN_ERR, "failed to get memory for queues");
2104 if (amb_talk (dev)) {
2105 PRINTK (KERN_ERR, "adapter did not accept queues");
2108 amb_ucode_version (dev);
2113 destroy_queues (dev);
2114 } /* create_queues, ucode_init */
2117 } /* get_loader_version */
2124 static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
2128 // set up known dev items straight away
2129 dev->pci_dev = pci_dev;
2130 pci_set_drvdata(pci_dev, dev);
2132 dev->iobase = pci_resource_start (pci_dev, 1);
2133 dev->irq = pci_dev->irq;
2134 dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2136 // flags (currently only dead)
2139 // Allocate cell rates (fibre)
2140 // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2141 // to be really pedantic, this should be ATM_OC3c_PCR
2142 dev->tx_avail = ATM_OC3_PCR;
2143 dev->rx_avail = ATM_OC3_PCR;
2145 // semaphore for txer/rxer modifications - we cannot use a
2146 // spinlock as the critical region needs to switch processes
2147 mutex_init(&dev->vcc_sf);
2148 // queue manipulation spinlocks; we want atomic reads and
2149 // writes to the queue descriptors (handles IRQ and SMP)
2150 // consider replacing "int pending" -> "atomic_t available"
2151 // => problem related to who gets to move queue pointers
2152 spin_lock_init (&dev->cq.lock);
2153 spin_lock_init (&dev->txq.lock);
2154 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2155 spin_lock_init (&dev->rxq[pool].lock);
2158 static void setup_pci_dev(struct pci_dev *pci_dev)
2162 // enable bus master accesses
2163 pci_set_master(pci_dev);
2165 // frobnicate latency (upwards, usually)
2166 pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2169 pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2171 if (lat != pci_lat) {
2172 PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2174 pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2178 static int amb_probe(struct pci_dev *pci_dev,
2179 const struct pci_device_id *pci_ent)
2185 err = pci_enable_device(pci_dev);
2187 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2191 // read resources from PCI configuration space
2194 if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2195 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2200 PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2201 " IO %llx, IRQ %u, MEM %p",
2202 (unsigned long long)pci_resource_start(pci_dev, 1),
2203 irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2206 err = pci_request_region(pci_dev, 1, DEV_LABEL);
2208 PRINTK (KERN_ERR, "IO range already in use!");
2212 dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
2214 PRINTK (KERN_ERR, "out of memory!");
2219 setup_dev(dev, pci_dev);
2221 err = amb_init(dev);
2223 PRINTK (KERN_ERR, "adapter initialisation failure");
2227 setup_pci_dev(pci_dev);
2229 // grab (but share) IRQ and install handler
2230 err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2232 PRINTK (KERN_ERR, "request IRQ failed!");
2236 dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1,
2238 if (!dev->atm_dev) {
2239 PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2244 PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2245 dev->atm_dev->number, dev, dev->atm_dev);
2246 dev->atm_dev->dev_data = (void *) dev;
2248 // register our address
2249 amb_esi (dev, dev->atm_dev->esi);
2251 // 0 bits for vpi, 10 bits for vci
2252 dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2253 dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2255 timer_setup(&dev->housekeeping, do_housekeeping, 0);
2256 mod_timer(&dev->housekeeping, jiffies);
2258 // enable host interrupts
2259 interrupts_on (dev);
2271 pci_release_region(pci_dev, 1);
2273 pci_disable_device(pci_dev);
2278 static void amb_remove_one(struct pci_dev *pci_dev)
2280 struct amb_dev *dev;
2282 dev = pci_get_drvdata(pci_dev);
2284 PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2285 del_timer_sync(&dev->housekeeping);
2286 // the drain should not be necessary
2287 drain_rx_pools(dev);
2288 interrupts_off(dev);
2290 free_irq(dev->irq, dev);
2291 pci_disable_device(pci_dev);
2292 destroy_queues(dev);
2293 atm_dev_deregister(dev->atm_dev);
2295 pci_release_region(pci_dev, 1);
2298 static void __init amb_check_args (void) {
2300 unsigned int max_rx_size;
2302 #ifdef DEBUG_AMBASSADOR
2303 PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2306 PRINTK (KERN_NOTICE, "no debugging support");
2309 if (cmds < MIN_QUEUE_SIZE)
2310 PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2311 cmds = MIN_QUEUE_SIZE);
2313 if (txs < MIN_QUEUE_SIZE)
2314 PRINTK (KERN_NOTICE, "txs has been raised to %u",
2315 txs = MIN_QUEUE_SIZE);
2317 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2318 if (rxs[pool] < MIN_QUEUE_SIZE)
2319 PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2320 pool, rxs[pool] = MIN_QUEUE_SIZE);
2322 // buffers sizes should be greater than zero and strictly increasing
2324 for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2325 if (rxs_bs[pool] <= max_rx_size)
2326 PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2327 pool, rxs_bs[pool]);
2329 max_rx_size = rxs_bs[pool];
2331 if (rx_lats < MIN_RX_BUFFERS)
2332 PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2333 rx_lats = MIN_RX_BUFFERS);
2338 /********** module stuff **********/
2340 MODULE_AUTHOR(maintainer_string);
2341 MODULE_DESCRIPTION(description_string);
2342 MODULE_LICENSE("GPL");
2343 MODULE_FIRMWARE("atmsar11.fw");
2344 module_param(debug, ushort, 0644);
2345 module_param(cmds, uint, 0);
2346 module_param(txs, uint, 0);
2347 module_param_array(rxs, uint, NULL, 0);
2348 module_param_array(rxs_bs, uint, NULL, 0);
2349 module_param(rx_lats, uint, 0);
2350 module_param(pci_lat, byte, 0);
2351 MODULE_PARM_DESC(debug, "debug bitmap, see .h file");
2352 MODULE_PARM_DESC(cmds, "number of command queue entries");
2353 MODULE_PARM_DESC(txs, "number of TX queue entries");
2354 MODULE_PARM_DESC(rxs, "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2355 MODULE_PARM_DESC(rxs_bs, "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2356 MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2357 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2359 /********** module entry **********/
2361 static const struct pci_device_id amb_pci_tbl[] = {
2362 { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
2363 { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
2367 MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2369 static struct pci_driver amb_driver = {
2372 .remove = amb_remove_one,
2373 .id_table = amb_pci_tbl,
2376 static int __init amb_module_init (void)
2378 PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2380 BUILD_BUG_ON(sizeof(amb_mem) != 4*16 + 4*12);
2387 return pci_register_driver(&amb_driver);
2390 /********** module exit **********/
2392 static void __exit amb_module_exit (void)
2394 PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2396 pci_unregister_driver(&amb_driver);
2399 module_init(amb_module_init);
2400 module_exit(amb_module_exit);