linux/drivers/atm/ambassador.c
<<
>>
Prefs
   1/*
   2  Madge Ambassador ATM Adapter driver.
   3  Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5  This program is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published by
   7  the Free Software Foundation; either version 2 of the License, or
   8  (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful,
  11  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  GNU General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, write to the Free Software
  17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19  The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20  system and in the file COPYING in the Linux kernel source.
  21*/
  22
  23/* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25#include <linux/module.h>
  26#include <linux/types.h>
  27#include <linux/pci.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/ioport.h>
  31#include <linux/atmdev.h>
  32#include <linux/delay.h>
  33#include <linux/interrupt.h>
  34#include <linux/poison.h>
  35#include <linux/bitrev.h>
  36#include <linux/mutex.h>
  37#include <linux/firmware.h>
  38#include <linux/ihex.h>
  39#include <linux/slab.h>
  40
  41#include <linux/atomic.h>
  42#include <asm/io.h>
  43#include <asm/byteorder.h>
  44
  45#include "ambassador.h"
  46
  47#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  48#define description_string "Madge ATM Ambassador driver"
  49#define version_string "1.2.4"
  50
  51static inline void __init show_version (void) {
  52  printk ("%s version %s\n", description_string, version_string);
  53}
  54
  55/*
  56  
  57  Theory of Operation
  58  
  59  I Hardware, detection, initialisation and shutdown.
  60  
  61  1. Supported Hardware
  62  
  63  This driver is for the PCI ATMizer-based Ambassador card (except
  64  very early versions). It is not suitable for the similar EISA "TR7"
  65  card. Commercially, both cards are known as Collage Server ATM
  66  adapters.
  67  
  68  The loader supports image transfer to the card, image start and few
  69  other miscellaneous commands.
  70  
  71  Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  72  
  73  The cards are big-endian.
  74  
  75  2. Detection
  76  
  77  Standard PCI stuff, the early cards are detected and rejected.
  78  
  79  3. Initialisation
  80  
  81  The cards are reset and the self-test results are checked. The
  82  microcode image is then transferred and started. This waits for a
  83  pointer to a descriptor containing details of the host-based queues
  84  and buffers and various parameters etc. Once they are processed
  85  normal operations may begin. The BIA is read using a microcode
  86  command.
  87  
  88  4. Shutdown
  89  
  90  This may be accomplished either by a card reset or via the microcode
  91  shutdown command. Further investigation required.
  92  
  93  5. Persistent state
  94  
  95  The card reset does not affect PCI configuration (good) or the
  96  contents of several other "shared run-time registers" (bad) which
  97  include doorbell and interrupt control as well as EEPROM and PCI
  98  control. The driver must be careful when modifying these registers
  99  not to touch bits it does not use and to undo any changes at exit.
 100  
 101  II Driver software
 102  
 103  0. Generalities
 104  
 105  The adapter is quite intelligent (fast) and has a simple interface
 106  (few features). VPI is always zero, 1024 VCIs are supported. There
 107  is limited cell rate support. UBR channels can be capped and ABR
 108  (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 109  support.
 110  
 111  1. Driver <-> Adapter Communication
 112  
 113  Apart from the basic loader commands, the driver communicates
 114  through three entities: the command queue (CQ), the transmit queue
 115  pair (TXQ) and the receive queue pairs (RXQ). These three entities
 116  are set up by the host and passed to the microcode just after it has
 117  been started.
 118  
 119  All queues are host-based circular queues. They are contiguous and
 120  (due to hardware limitations) have some restrictions as to their
 121  locations in (bus) memory. They are of the "full means the same as
 122  empty so don't do that" variety since the adapter uses pointers
 123  internally.
 124  
 125  The queue pairs work as follows: one queue is for supply to the
 126  adapter, items in it are pending and are owned by the adapter; the
 127  other is the queue for return from the adapter, items in it have
 128  been dealt with by the adapter. The host adds items to the supply
 129  (TX descriptors and free RX buffer descriptors) and removes items
 130  from the return (TX and RX completions). The adapter deals with out
 131  of order completions.
 132  
 133  Interrupts (card to host) and the doorbell (host to card) are used
 134  for signalling.
 135  
 136  1. CQ
 137  
 138  This is to communicate "open VC", "close VC", "get stats" etc. to
 139  the adapter. At most one command is retired every millisecond by the
 140  card. There is no out of order completion or notification. The
 141  driver needs to check the return code of the command, waiting as
 142  appropriate.
 143  
 144  2. TXQ
 145  
 146  TX supply items are of variable length (scatter gather support) and
 147  so the queue items are (more or less) pointers to the real thing.
 148  Each TX supply item contains a unique, host-supplied handle (the skb
 149  bus address seems most sensible as this works for Alphas as well,
 150  there is no need to do any endian conversions on the handles).
 151  
 152  TX return items consist of just the handles above.
 153  
 154  3. RXQ (up to 4 of these with different lengths and buffer sizes)
 155  
 156  RX supply items consist of a unique, host-supplied handle (the skb
 157  bus address again) and a pointer to the buffer data area.
 158  
 159  RX return items consist of the handle above, the VC, length and a
 160  status word. This just screams "oh so easy" doesn't it?
 161
 162  Note on RX pool sizes:
 163   
 164  Each pool should have enough buffers to handle a back-to-back stream
 165  of minimum sized frames on a single VC. For example:
 166  
 167    frame spacing = 3us (about right)
 168    
 169    delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 170    
 171    min number of buffers for one VC = 1 + delay/spacing (buffers)
 172
 173    delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 174    
 175  The 20us delay assumes that there is no need to sleep; if we need to
 176  sleep to get buffers we are going to drop frames anyway.
 177  
 178  In fact, each pool should have enough buffers to support the
 179  simultaneous reassembly of a separate frame on each VC and cope with
 180  the case in which frames complete in round robin cell fashion on
 181  each VC.
 182  
 183  Only one frame can complete at each cell arrival, so if "n" VCs are
 184  open, the worst case is to have them all complete frames together
 185  followed by all starting new frames together.
 186  
 187    desired number of buffers = n + delay/spacing
 188    
 189  These are the extreme requirements, however, they are "n+k" for some
 190  "k" so we have only the constant to choose. This is the argument
 191  rx_lats which current defaults to 7.
 192  
 193  Actually, "n ? n+k : 0" is better and this is what is implemented,
 194  subject to the limit given by the pool size.
 195  
 196  4. Driver locking
 197  
 198  Simple spinlocks are used around the TX and RX queue mechanisms.
 199  Anyone with a faster, working method is welcome to implement it.
 200  
 201  The adapter command queue is protected with a spinlock. We always
 202  wait for commands to complete.
 203  
 204  A more complex form of locking is used around parts of the VC open
 205  and close functions. There are three reasons for a lock: 1. we need
 206  to do atomic rate reservation and release (not used yet), 2. Opening
 207  sometimes involves two adapter commands which must not be separated
 208  by another command on the same VC, 3. the changes to RX pool size
 209  must be atomic. The lock needs to work over context switches, so we
 210  use a semaphore.
 211  
 212  III Hardware Features and Microcode Bugs
 213  
 214  1. Byte Ordering
 215  
 216  *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 217  
 218  2. Memory access
 219  
 220  All structures that are not accessed using DMA must be 4-byte
 221  aligned (not a problem) and must not cross 4MB boundaries.
 222  
 223  There is a DMA memory hole at E0000000-E00000FF (groan).
 224  
 225  TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 226  but for a hardware bug).
 227  
 228  RX buffers (DMA write) must not cross 16MB boundaries and must
 229  include spare trailing bytes up to the next 4-byte boundary; they
 230  will be written with rubbish.
 231  
 232  The PLX likes to prefetch; if reading up to 4 u32 past the end of
 233  each TX fragment is not a problem, then TX can be made to go a
 234  little faster by passing a flag at init that disables a prefetch
 235  workaround. We do not pass this flag. (new microcode only)
 236  
 237  Now we:
 238  . Note that alloc_skb rounds up size to a 16byte boundary.  
 239  . Ensure all areas do not traverse 4MB boundaries.
 240  . Ensure all areas do not start at a E00000xx bus address.
 241  (I cannot be certain, but this may always hold with Linux)
 242  . Make all failures cause a loud message.
 243  . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 244  . Discard non-conforming TX fragment descriptors (the TX fails).
 245  In the future we could:
 246  . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 247  . Segment TX areas into some/more fragments, when necessary.
 248  . Relax checks for non-DMA items (ignore hole).
 249  . Give scatter-gather (iovec) requirements using ???. (?)
 250  
 251  3. VC close is broken (only for new microcode)
 252  
 253  The VC close adapter microcode command fails to do anything if any
 254  frames have been received on the VC but none have been transmitted.
 255  Frames continue to be reassembled and passed (with IRQ) to the
 256  driver.
 257  
 258  IV To Do List
 259  
 260  . Fix bugs!
 261  
 262  . Timer code may be broken.
 263  
 264  . Deal with buggy VC close (somehow) in microcode 12.
 265  
 266  . Handle interrupted and/or non-blocking writes - is this a job for
 267    the protocol layer?
 268  
 269  . Add code to break up TX fragments when they span 4MB boundaries.
 270  
 271  . Add SUNI phy layer (need to know where SUNI lives on card).
 272  
 273  . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 274    leave extra headroom space for Ambassador TX descriptors.
 275  
 276  . Understand these elements of struct atm_vcc: recvq (proto?),
 277    sleep, callback, listenq, backlog_quota, reply and user_back.
 278  
 279  . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 280  
 281  . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 282  
 283  . Decide whether RX buffer recycling is or can be made completely safe;
 284    turn it back on. It looks like Werner is going to axe this.
 285  
 286  . Implement QoS changes on open VCs (involves extracting parts of VC open
 287    and close into separate functions and using them to make changes).
 288  
 289  . Hack on command queue so that someone can issue multiple commands and wait
 290    on the last one (OR only "no-op" or "wait" commands are waited for).
 291  
 292  . Eliminate need for while-schedule around do_command.
 293  
 294*/
 295
 296static void do_housekeeping (unsigned long arg);
 297/********** globals **********/
 298
 299static unsigned short debug = 0;
 300static unsigned int cmds = 8;
 301static unsigned int txs = 32;
 302static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 303static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 304static unsigned int rx_lats = 7;
 305static unsigned char pci_lat = 0;
 306
 307static const unsigned long onegigmask = -1 << 30;
 308
 309/********** access to adapter **********/
 310
 311static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 312  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 313#ifdef AMB_MMIO
 314  dev->membase[addr / sizeof(u32)] = data;
 315#else
 316  outl (data, dev->iobase + addr);
 317#endif
 318}
 319
 320static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 321#ifdef AMB_MMIO
 322  u32 data = dev->membase[addr / sizeof(u32)];
 323#else
 324  u32 data = inl (dev->iobase + addr);
 325#endif
 326  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 327  return data;
 328}
 329
 330static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 331  __be32 be = cpu_to_be32 (data);
 332  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 333#ifdef AMB_MMIO
 334  dev->membase[addr / sizeof(u32)] = be;
 335#else
 336  outl (be, dev->iobase + addr);
 337#endif
 338}
 339
 340static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 341#ifdef AMB_MMIO
 342  __be32 be = dev->membase[addr / sizeof(u32)];
 343#else
 344  __be32 be = inl (dev->iobase + addr);
 345#endif
 346  u32 data = be32_to_cpu (be);
 347  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 348  return data;
 349}
 350
 351/********** dump routines **********/
 352
 353static inline void dump_registers (const amb_dev * dev) {
 354#ifdef DEBUG_AMBASSADOR
 355  if (debug & DBG_REGS) {
 356    size_t i;
 357    PRINTD (DBG_REGS, "reading PLX control: ");
 358    for (i = 0x00; i < 0x30; i += sizeof(u32))
 359      rd_mem (dev, i);
 360    PRINTD (DBG_REGS, "reading mailboxes: ");
 361    for (i = 0x40; i < 0x60; i += sizeof(u32))
 362      rd_mem (dev, i);
 363    PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 364    for (i = 0x60; i < 0x70; i += sizeof(u32))
 365      rd_mem (dev, i);
 366  }
 367#else
 368  (void) dev;
 369#endif
 370  return;
 371}
 372
 373static inline void dump_loader_block (volatile loader_block * lb) {
 374#ifdef DEBUG_AMBASSADOR
 375  unsigned int i;
 376  PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 377           lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 378  for (i = 0; i < MAX_COMMAND_DATA; ++i)
 379    PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 380  PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 381#else
 382  (void) lb;
 383#endif
 384  return;
 385}
 386
 387static inline void dump_command (command * cmd) {
 388#ifdef DEBUG_AMBASSADOR
 389  unsigned int i;
 390  PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 391           cmd, /*be32_to_cpu*/ (cmd->request));
 392  for (i = 0; i < 3; ++i)
 393    PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 394  PRINTDE (DBG_CMD, "");
 395#else
 396  (void) cmd;
 397#endif
 398  return;
 399}
 400
 401static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 402#ifdef DEBUG_AMBASSADOR
 403  unsigned int i;
 404  unsigned char * data = skb->data;
 405  PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 406  for (i=0; i<skb->len && i < 256;i++)
 407    PRINTDM (DBG_DATA, "%02x ", data[i]);
 408  PRINTDE (DBG_DATA,"");
 409#else
 410  (void) prefix;
 411  (void) vc;
 412  (void) skb;
 413#endif
 414  return;
 415}
 416
 417/********** check memory areas for use by Ambassador **********/
 418
 419/* see limitations under Hardware Features */
 420
 421static int check_area (void * start, size_t length) {
 422  // assumes length > 0
 423  const u32 fourmegmask = -1 << 22;
 424  const u32 twofivesixmask = -1 << 8;
 425  const u32 starthole = 0xE0000000;
 426  u32 startaddress = virt_to_bus (start);
 427  u32 lastaddress = startaddress+length-1;
 428  if ((startaddress ^ lastaddress) & fourmegmask ||
 429      (startaddress & twofivesixmask) == starthole) {
 430    PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 431            startaddress, lastaddress);
 432    return -1;
 433  } else {
 434    return 0;
 435  }
 436}
 437
 438/********** free an skb (as per ATM device driver documentation) **********/
 439
 440static void amb_kfree_skb (struct sk_buff * skb) {
 441  if (ATM_SKB(skb)->vcc->pop) {
 442    ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 443  } else {
 444    dev_kfree_skb_any (skb);
 445  }
 446}
 447
 448/********** TX completion **********/
 449
 450static void tx_complete (amb_dev * dev, tx_out * tx) {
 451  tx_simple * tx_descr = bus_to_virt (tx->handle);
 452  struct sk_buff * skb = tx_descr->skb;
 453  
 454  PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 455  
 456  // VC layer stats
 457  atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 458  
 459  // free the descriptor
 460  kfree (tx_descr);
 461  
 462  // free the skb
 463  amb_kfree_skb (skb);
 464  
 465  dev->stats.tx_ok++;
 466  return;
 467}
 468
 469/********** RX completion **********/
 470
 471static void rx_complete (amb_dev * dev, rx_out * rx) {
 472  struct sk_buff * skb = bus_to_virt (rx->handle);
 473  u16 vc = be16_to_cpu (rx->vc);
 474  // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 475  u16 status = be16_to_cpu (rx->status);
 476  u16 rx_len = be16_to_cpu (rx->length);
 477  
 478  PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 479  
 480  // XXX move this in and add to VC stats ???
 481  if (!status) {
 482    struct atm_vcc * atm_vcc = dev->rxer[vc];
 483    dev->stats.rx.ok++;
 484    
 485    if (atm_vcc) {
 486      
 487      if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 488        
 489        if (atm_charge (atm_vcc, skb->truesize)) {
 490          
 491          // prepare socket buffer
 492          ATM_SKB(skb)->vcc = atm_vcc;
 493          skb_put (skb, rx_len);
 494          
 495          dump_skb ("<<<", vc, skb);
 496          
 497          // VC layer stats
 498          atomic_inc(&atm_vcc->stats->rx);
 499          __net_timestamp(skb);
 500          // end of our responsibility
 501          atm_vcc->push (atm_vcc, skb);
 502          return;
 503          
 504        } else {
 505          // someone fix this (message), please!
 506          PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 507          // drop stats incremented in atm_charge
 508        }
 509        
 510      } else {
 511        PRINTK (KERN_INFO, "dropped over-size frame");
 512        // should we count this?
 513        atomic_inc(&atm_vcc->stats->rx_drop);
 514      }
 515      
 516    } else {
 517      PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 518      // this is an adapter bug, only in new version of microcode
 519    }
 520    
 521  } else {
 522    dev->stats.rx.error++;
 523    if (status & CRC_ERR)
 524      dev->stats.rx.badcrc++;
 525    if (status & LEN_ERR)
 526      dev->stats.rx.toolong++;
 527    if (status & ABORT_ERR)
 528      dev->stats.rx.aborted++;
 529    if (status & UNUSED_ERR)
 530      dev->stats.rx.unused++;
 531  }
 532  
 533  dev_kfree_skb_any (skb);
 534  return;
 535}
 536
 537/*
 538  
 539  Note on queue handling.
 540  
 541  Here "give" and "take" refer to queue entries and a queue (pair)
 542  rather than frames to or from the host or adapter. Empty frame
 543  buffers are given to the RX queue pair and returned unused or
 544  containing RX frames. TX frames (well, pointers to TX fragment
 545  lists) are given to the TX queue pair, completions are returned.
 546  
 547*/
 548
 549/********** command queue **********/
 550
 551// I really don't like this, but it's the best I can do at the moment
 552
 553// also, the callers are responsible for byte order as the microcode
 554// sometimes does 16-bit accesses (yuk yuk yuk)
 555
 556static int command_do (amb_dev * dev, command * cmd) {
 557  amb_cq * cq = &dev->cq;
 558  volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 559  command * my_slot;
 560  
 561  PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 562  
 563  if (test_bit (dead, &dev->flags))
 564    return 0;
 565  
 566  spin_lock (&cq->lock);
 567  
 568  // if not full...
 569  if (cq->pending < cq->maximum) {
 570    // remember my slot for later
 571    my_slot = ptrs->in;
 572    PRINTD (DBG_CMD, "command in slot %p", my_slot);
 573    
 574    dump_command (cmd);
 575    
 576    // copy command in
 577    *ptrs->in = *cmd;
 578    cq->pending++;
 579    ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 580    
 581    // mail the command
 582    wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 583    
 584    if (cq->pending > cq->high)
 585      cq->high = cq->pending;
 586    spin_unlock (&cq->lock);
 587    
 588    // these comments were in a while-loop before, msleep removes the loop
 589    // go to sleep
 590    // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 591    msleep(cq->pending);
 592    
 593    // wait for my slot to be reached (all waiters are here or above, until...)
 594    while (ptrs->out != my_slot) {
 595      PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 596      set_current_state(TASK_UNINTERRUPTIBLE);
 597      schedule();
 598    }
 599    
 600    // wait on my slot (... one gets to its slot, and... )
 601    while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 602      PRINTD (DBG_CMD, "wait: command slot completion");
 603      set_current_state(TASK_UNINTERRUPTIBLE);
 604      schedule();
 605    }
 606    
 607    PRINTD (DBG_CMD, "command complete");
 608    // update queue (... moves the queue along to the next slot)
 609    spin_lock (&cq->lock);
 610    cq->pending--;
 611    // copy command out
 612    *cmd = *ptrs->out;
 613    ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 614    spin_unlock (&cq->lock);
 615    
 616    return 0;
 617  } else {
 618    cq->filled++;
 619    spin_unlock (&cq->lock);
 620    return -EAGAIN;
 621  }
 622  
 623}
 624
 625/********** TX queue pair **********/
 626
 627static int tx_give (amb_dev * dev, tx_in * tx) {
 628  amb_txq * txq = &dev->txq;
 629  unsigned long flags;
 630  
 631  PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 632
 633  if (test_bit (dead, &dev->flags))
 634    return 0;
 635  
 636  spin_lock_irqsave (&txq->lock, flags);
 637  
 638  if (txq->pending < txq->maximum) {
 639    PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 640
 641    *txq->in.ptr = *tx;
 642    txq->pending++;
 643    txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 644    // hand over the TX and ring the bell
 645    wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 646    wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 647    
 648    if (txq->pending > txq->high)
 649      txq->high = txq->pending;
 650    spin_unlock_irqrestore (&txq->lock, flags);
 651    return 0;
 652  } else {
 653    txq->filled++;
 654    spin_unlock_irqrestore (&txq->lock, flags);
 655    return -EAGAIN;
 656  }
 657}
 658
 659static int tx_take (amb_dev * dev) {
 660  amb_txq * txq = &dev->txq;
 661  unsigned long flags;
 662  
 663  PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 664  
 665  spin_lock_irqsave (&txq->lock, flags);
 666  
 667  if (txq->pending && txq->out.ptr->handle) {
 668    // deal with TX completion
 669    tx_complete (dev, txq->out.ptr);
 670    // mark unused again
 671    txq->out.ptr->handle = 0;
 672    // remove item
 673    txq->pending--;
 674    txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 675    
 676    spin_unlock_irqrestore (&txq->lock, flags);
 677    return 0;
 678  } else {
 679    
 680    spin_unlock_irqrestore (&txq->lock, flags);
 681    return -1;
 682  }
 683}
 684
 685/********** RX queue pairs **********/
 686
 687static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 688  amb_rxq * rxq = &dev->rxq[pool];
 689  unsigned long flags;
 690  
 691  PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 692  
 693  spin_lock_irqsave (&rxq->lock, flags);
 694  
 695  if (rxq->pending < rxq->maximum) {
 696    PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 697
 698    *rxq->in.ptr = *rx;
 699    rxq->pending++;
 700    rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 701    // hand over the RX buffer
 702    wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 703    
 704    spin_unlock_irqrestore (&rxq->lock, flags);
 705    return 0;
 706  } else {
 707    spin_unlock_irqrestore (&rxq->lock, flags);
 708    return -1;
 709  }
 710}
 711
 712static int rx_take (amb_dev * dev, unsigned char pool) {
 713  amb_rxq * rxq = &dev->rxq[pool];
 714  unsigned long flags;
 715  
 716  PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 717  
 718  spin_lock_irqsave (&rxq->lock, flags);
 719  
 720  if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 721    // deal with RX completion
 722    rx_complete (dev, rxq->out.ptr);
 723    // mark unused again
 724    rxq->out.ptr->status = 0;
 725    rxq->out.ptr->length = 0;
 726    // remove item
 727    rxq->pending--;
 728    rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 729    
 730    if (rxq->pending < rxq->low)
 731      rxq->low = rxq->pending;
 732    spin_unlock_irqrestore (&rxq->lock, flags);
 733    return 0;
 734  } else {
 735    if (!rxq->pending && rxq->buffers_wanted)
 736      rxq->emptied++;
 737    spin_unlock_irqrestore (&rxq->lock, flags);
 738    return -1;
 739  }
 740}
 741
 742/********** RX Pool handling **********/
 743
 744/* pre: buffers_wanted = 0, post: pending = 0 */
 745static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 746  amb_rxq * rxq = &dev->rxq[pool];
 747  
 748  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 749  
 750  if (test_bit (dead, &dev->flags))
 751    return;
 752  
 753  /* we are not quite like the fill pool routines as we cannot just
 754     remove one buffer, we have to remove all of them, but we might as
 755     well pretend... */
 756  if (rxq->pending > rxq->buffers_wanted) {
 757    command cmd;
 758    cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 759    cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 760    while (command_do (dev, &cmd))
 761      schedule();
 762    /* the pool may also be emptied via the interrupt handler */
 763    while (rxq->pending > rxq->buffers_wanted)
 764      if (rx_take (dev, pool))
 765        schedule();
 766  }
 767  
 768  return;
 769}
 770
 771static void drain_rx_pools (amb_dev * dev) {
 772  unsigned char pool;
 773  
 774  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 775  
 776  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 777    drain_rx_pool (dev, pool);
 778}
 779
 780static void fill_rx_pool (amb_dev * dev, unsigned char pool,
 781                                 gfp_t priority)
 782{
 783  rx_in rx;
 784  amb_rxq * rxq;
 785  
 786  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 787  
 788  if (test_bit (dead, &dev->flags))
 789    return;
 790  
 791  rxq = &dev->rxq[pool];
 792  while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 793    
 794    struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 795    if (!skb) {
 796      PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 797      return;
 798    }
 799    if (check_area (skb->data, skb->truesize)) {
 800      dev_kfree_skb_any (skb);
 801      return;
 802    }
 803    // cast needed as there is no %? for pointer differences
 804    PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 805            skb, skb->head, (long) skb_end_offset(skb));
 806    rx.handle = virt_to_bus (skb);
 807    rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 808    if (rx_give (dev, &rx, pool))
 809      dev_kfree_skb_any (skb);
 810    
 811  }
 812  
 813  return;
 814}
 815
 816// top up all RX pools
 817static void fill_rx_pools (amb_dev * dev) {
 818  unsigned char pool;
 819  
 820  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 821  
 822  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 823    fill_rx_pool (dev, pool, GFP_ATOMIC);
 824  
 825  return;
 826}
 827
 828/********** enable host interrupts **********/
 829
 830static void interrupts_on (amb_dev * dev) {
 831  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 832            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 833            | AMB_INTERRUPT_BITS);
 834}
 835
 836/********** disable host interrupts **********/
 837
 838static void interrupts_off (amb_dev * dev) {
 839  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 840            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 841            &~ AMB_INTERRUPT_BITS);
 842}
 843
 844/********** interrupt handling **********/
 845
 846static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 847  amb_dev * dev = dev_id;
 848  
 849  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 850  
 851  {
 852    u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 853  
 854    // for us or someone else sharing the same interrupt
 855    if (!interrupt) {
 856      PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 857      return IRQ_NONE;
 858    }
 859    
 860    // definitely for us
 861    PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 862    wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 863  }
 864  
 865  {
 866    unsigned int irq_work = 0;
 867    unsigned char pool;
 868    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 869      while (!rx_take (dev, pool))
 870        ++irq_work;
 871    while (!tx_take (dev))
 872      ++irq_work;
 873  
 874    if (irq_work) {
 875      fill_rx_pools (dev);
 876
 877      PRINTD (DBG_IRQ, "work done: %u", irq_work);
 878    } else {
 879      PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 880    }
 881  }
 882  
 883  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 884  return IRQ_HANDLED;
 885}
 886
 887/********** make rate (not quite as much fun as Horizon) **********/
 888
 889static int make_rate (unsigned int rate, rounding r,
 890                      u16 * bits, unsigned int * actual) {
 891  unsigned char exp = -1; // hush gcc
 892  unsigned int man = -1;  // hush gcc
 893  
 894  PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 895  
 896  // rates in cells per second, ITU format (nasty 16-bit floating-point)
 897  // given 5-bit e and 9-bit m:
 898  // rate = EITHER (1+m/2^9)*2^e    OR 0
 899  // bits = EITHER 1<<14 | e<<9 | m OR 0
 900  // (bit 15 is "reserved", bit 14 "non-zero")
 901  // smallest rate is 0 (special representation)
 902  // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 903  // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 904  // simple algorithm:
 905  // find position of top bit, this gives e
 906  // remove top bit and shift (rounding if feeling clever) by 9-e
 907  
 908  // ucode bug: please don't set bit 14! so 0 rate not representable
 909  
 910  if (rate > 0xffc00000U) {
 911    // larger than largest representable rate
 912    
 913    if (r == round_up) {
 914        return -EINVAL;
 915    } else {
 916      exp = 31;
 917      man = 511;
 918    }
 919    
 920  } else if (rate) {
 921    // representable rate
 922    
 923    exp = 31;
 924    man = rate;
 925    
 926    // invariant: rate = man*2^(exp-31)
 927    while (!(man & (1<<31))) {
 928      exp = exp - 1;
 929      man = man<<1;
 930    }
 931    
 932    // man has top bit set
 933    // rate = (2^31+(man-2^31))*2^(exp-31)
 934    // rate = (1+(man-2^31)/2^31)*2^exp
 935    man = man<<1;
 936    man &= 0xffffffffU; // a nop on 32-bit systems
 937    // rate = (1+man/2^32)*2^exp
 938    
 939    // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 940    // time to lose significance... we want m in the range 0 to 2^9-1
 941    // rounding presents a minor problem... we first decide which way
 942    // we are rounding (based on given rounding direction and possibly
 943    // the bits of the mantissa that are to be discarded).
 944    
 945    switch (r) {
 946      case round_down: {
 947        // just truncate
 948        man = man>>(32-9);
 949        break;
 950      }
 951      case round_up: {
 952        // check all bits that we are discarding
 953        if (man & (~0U>>9)) {
 954          man = (man>>(32-9)) + 1;
 955          if (man == (1<<9)) {
 956            // no need to check for round up outside of range
 957            man = 0;
 958            exp += 1;
 959          }
 960        } else {
 961          man = (man>>(32-9));
 962        }
 963        break;
 964      }
 965      case round_nearest: {
 966        // check msb that we are discarding
 967        if (man & (1<<(32-9-1))) {
 968          man = (man>>(32-9)) + 1;
 969          if (man == (1<<9)) {
 970            // no need to check for round up outside of range
 971            man = 0;
 972            exp += 1;
 973          }
 974        } else {
 975          man = (man>>(32-9));
 976        }
 977        break;
 978      }
 979    }
 980    
 981  } else {
 982    // zero rate - not representable
 983    
 984    if (r == round_down) {
 985      return -EINVAL;
 986    } else {
 987      exp = 0;
 988      man = 0;
 989    }
 990    
 991  }
 992  
 993  PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
 994  
 995  if (bits)
 996    *bits = /* (1<<14) | */ (exp<<9) | man;
 997  
 998  if (actual)
 999    *actual = (exp >= 9)
1000      ? (1 << exp) + (man << (exp-9))
1001      : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1002  
1003  return 0;
1004}
1005
1006/********** Linux ATM Operations **********/
1007
1008// some are not yet implemented while others do not make sense for
1009// this device
1010
1011/********** Open a VC **********/
1012
1013static int amb_open (struct atm_vcc * atm_vcc)
1014{
1015  int error;
1016  
1017  struct atm_qos * qos;
1018  struct atm_trafprm * txtp;
1019  struct atm_trafprm * rxtp;
1020  u16 tx_rate_bits = -1; // hush gcc
1021  u16 tx_vc_bits = -1; // hush gcc
1022  u16 tx_frame_bits = -1; // hush gcc
1023  
1024  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1025  amb_vcc * vcc;
1026  unsigned char pool = -1; // hush gcc
1027  short vpi = atm_vcc->vpi;
1028  int vci = atm_vcc->vci;
1029  
1030  PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1031  
1032#ifdef ATM_VPI_UNSPEC
1033  // UNSPEC is deprecated, remove this code eventually
1034  if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1035    PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1036    return -EINVAL;
1037  }
1038#endif
1039  
1040  if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1041        0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1042    PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1043    return -EINVAL;
1044  }
1045  
1046  qos = &atm_vcc->qos;
1047  
1048  if (qos->aal != ATM_AAL5) {
1049    PRINTD (DBG_QOS, "AAL not supported");
1050    return -EINVAL;
1051  }
1052  
1053  // traffic parameters
1054  
1055  PRINTD (DBG_QOS, "TX:");
1056  txtp = &qos->txtp;
1057  if (txtp->traffic_class != ATM_NONE) {
1058    switch (txtp->traffic_class) {
1059      case ATM_UBR: {
1060        // we take "the PCR" as a rate-cap
1061        int pcr = atm_pcr_goal (txtp);
1062        if (!pcr) {
1063          // no rate cap
1064          tx_rate_bits = 0;
1065          tx_vc_bits = TX_UBR;
1066          tx_frame_bits = TX_FRAME_NOTCAP;
1067        } else {
1068          rounding r;
1069          if (pcr < 0) {
1070            r = round_down;
1071            pcr = -pcr;
1072          } else {
1073            r = round_up;
1074          }
1075          error = make_rate (pcr, r, &tx_rate_bits, NULL);
1076          if (error)
1077            return error;
1078          tx_vc_bits = TX_UBR_CAPPED;
1079          tx_frame_bits = TX_FRAME_CAPPED;
1080        }
1081        break;
1082      }
1083#if 0
1084      case ATM_ABR: {
1085        pcr = atm_pcr_goal (txtp);
1086        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1087        break;
1088      }
1089#endif
1090      default: {
1091        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1092        PRINTD (DBG_QOS, "request for non-UBR denied");
1093        return -EINVAL;
1094      }
1095    }
1096    PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1097            tx_rate_bits, tx_vc_bits);
1098  }
1099  
1100  PRINTD (DBG_QOS, "RX:");
1101  rxtp = &qos->rxtp;
1102  if (rxtp->traffic_class == ATM_NONE) {
1103    // do nothing
1104  } else {
1105    // choose an RX pool (arranged in increasing size)
1106    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1107      if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1108        PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1109                pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1110        break;
1111      }
1112    if (pool == NUM_RX_POOLS) {
1113      PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1114              "no pool suitable for VC (RX max_sdu %d is too large)",
1115              rxtp->max_sdu);
1116      return -EINVAL;
1117    }
1118    
1119    switch (rxtp->traffic_class) {
1120      case ATM_UBR: {
1121        break;
1122      }
1123#if 0
1124      case ATM_ABR: {
1125        pcr = atm_pcr_goal (rxtp);
1126        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1127        break;
1128      }
1129#endif
1130      default: {
1131        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1132        PRINTD (DBG_QOS, "request for non-UBR denied");
1133        return -EINVAL;
1134      }
1135    }
1136  }
1137  
1138  // get space for our vcc stuff
1139  vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1140  if (!vcc) {
1141    PRINTK (KERN_ERR, "out of memory!");
1142    return -ENOMEM;
1143  }
1144  atm_vcc->dev_data = (void *) vcc;
1145  
1146  // no failures beyond this point
1147  
1148  // we are not really "immediately before allocating the connection
1149  // identifier in hardware", but it will just have to do!
1150  set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1151  
1152  if (txtp->traffic_class != ATM_NONE) {
1153    command cmd;
1154    
1155    vcc->tx_frame_bits = tx_frame_bits;
1156    
1157    mutex_lock(&dev->vcc_sf);
1158    if (dev->rxer[vci]) {
1159      // RXer on the channel already, just modify rate...
1160      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1161      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1162      cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1163      while (command_do (dev, &cmd))
1164        schedule();
1165      // ... and TX flags, preserving the RX pool
1166      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1167      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1168      cmd.args.modify_flags.flags = cpu_to_be32
1169        ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1170          | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1171      while (command_do (dev, &cmd))
1172        schedule();
1173    } else {
1174      // no RXer on the channel, just open (with pool zero)
1175      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1176      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1177      cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1178      cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1179      while (command_do (dev, &cmd))
1180        schedule();
1181    }
1182    dev->txer[vci].tx_present = 1;
1183    mutex_unlock(&dev->vcc_sf);
1184  }
1185  
1186  if (rxtp->traffic_class != ATM_NONE) {
1187    command cmd;
1188    
1189    vcc->rx_info.pool = pool;
1190    
1191    mutex_lock(&dev->vcc_sf);
1192    /* grow RX buffer pool */
1193    if (!dev->rxq[pool].buffers_wanted)
1194      dev->rxq[pool].buffers_wanted = rx_lats;
1195    dev->rxq[pool].buffers_wanted += 1;
1196    fill_rx_pool (dev, pool, GFP_KERNEL);
1197    
1198    if (dev->txer[vci].tx_present) {
1199      // TXer on the channel already
1200      // switch (from pool zero) to this pool, preserving the TX bits
1201      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1202      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1203      cmd.args.modify_flags.flags = cpu_to_be32
1204        ( (pool << SRB_POOL_SHIFT)
1205          | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1206    } else {
1207      // no TXer on the channel, open the VC (with no rate info)
1208      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1209      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1210      cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1211      cmd.args.open.rate = cpu_to_be32 (0);
1212    }
1213    while (command_do (dev, &cmd))
1214      schedule();
1215    // this link allows RX frames through
1216    dev->rxer[vci] = atm_vcc;
1217    mutex_unlock(&dev->vcc_sf);
1218  }
1219  
1220  // indicate readiness
1221  set_bit(ATM_VF_READY,&atm_vcc->flags);
1222  
1223  return 0;
1224}
1225
1226/********** Close a VC **********/
1227
1228static void amb_close (struct atm_vcc * atm_vcc) {
1229  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1230  amb_vcc * vcc = AMB_VCC (atm_vcc);
1231  u16 vci = atm_vcc->vci;
1232  
1233  PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1234  
1235  // indicate unreadiness
1236  clear_bit(ATM_VF_READY,&atm_vcc->flags);
1237  
1238  // disable TXing
1239  if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1240    command cmd;
1241    
1242    mutex_lock(&dev->vcc_sf);
1243    if (dev->rxer[vci]) {
1244      // RXer still on the channel, just modify rate... XXX not really needed
1245      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1246      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1247      cmd.args.modify_rate.rate = cpu_to_be32 (0);
1248      // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1249    } else {
1250      // no RXer on the channel, close channel
1251      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1252      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1253    }
1254    dev->txer[vci].tx_present = 0;
1255    while (command_do (dev, &cmd))
1256      schedule();
1257    mutex_unlock(&dev->vcc_sf);
1258  }
1259  
1260  // disable RXing
1261  if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1262    command cmd;
1263    
1264    // this is (the?) one reason why we need the amb_vcc struct
1265    unsigned char pool = vcc->rx_info.pool;
1266    
1267    mutex_lock(&dev->vcc_sf);
1268    if (dev->txer[vci].tx_present) {
1269      // TXer still on the channel, just go to pool zero XXX not really needed
1270      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1271      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1272      cmd.args.modify_flags.flags = cpu_to_be32
1273        (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1274    } else {
1275      // no TXer on the channel, close the VC
1276      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1277      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1278    }
1279    // forget the rxer - no more skbs will be pushed
1280    if (atm_vcc != dev->rxer[vci])
1281      PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1282              "arghhh! we're going to die!",
1283              vcc, dev->rxer[vci]);
1284    dev->rxer[vci] = NULL;
1285    while (command_do (dev, &cmd))
1286      schedule();
1287    
1288    /* shrink RX buffer pool */
1289    dev->rxq[pool].buffers_wanted -= 1;
1290    if (dev->rxq[pool].buffers_wanted == rx_lats) {
1291      dev->rxq[pool].buffers_wanted = 0;
1292      drain_rx_pool (dev, pool);
1293    }
1294    mutex_unlock(&dev->vcc_sf);
1295  }
1296  
1297  // free our structure
1298  kfree (vcc);
1299  
1300  // say the VPI/VCI is free again
1301  clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1302
1303  return;
1304}
1305
1306/********** Send **********/
1307
1308static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1309  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1310  amb_vcc * vcc = AMB_VCC(atm_vcc);
1311  u16 vc = atm_vcc->vci;
1312  unsigned int tx_len = skb->len;
1313  unsigned char * tx_data = skb->data;
1314  tx_simple * tx_descr;
1315  tx_in tx;
1316  
1317  if (test_bit (dead, &dev->flags))
1318    return -EIO;
1319  
1320  PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1321          vc, tx_data, tx_len);
1322  
1323  dump_skb (">>>", vc, skb);
1324  
1325  if (!dev->txer[vc].tx_present) {
1326    PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1327    return -EBADFD;
1328  }
1329  
1330  // this is a driver private field so we have to set it ourselves,
1331  // despite the fact that we are _required_ to use it to check for a
1332  // pop function
1333  ATM_SKB(skb)->vcc = atm_vcc;
1334  
1335  if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1336    PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1337    return -EIO;
1338  }
1339  
1340  if (check_area (skb->data, skb->len)) {
1341    atomic_inc(&atm_vcc->stats->tx_err);
1342    return -ENOMEM; // ?
1343  }
1344  
1345  // allocate memory for fragments
1346  tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1347  if (!tx_descr) {
1348    PRINTK (KERN_ERR, "could not allocate TX descriptor");
1349    return -ENOMEM;
1350  }
1351  if (check_area (tx_descr, sizeof(tx_simple))) {
1352    kfree (tx_descr);
1353    return -ENOMEM;
1354  }
1355  PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1356  
1357  tx_descr->skb = skb;
1358  
1359  tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1360  tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1361  
1362  tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1363  tx_descr->tx_frag_end.vc = 0;
1364  tx_descr->tx_frag_end.next_descriptor_length = 0;
1365  tx_descr->tx_frag_end.next_descriptor = 0;
1366#ifdef AMB_NEW_MICROCODE
1367  tx_descr->tx_frag_end.cpcs_uu = 0;
1368  tx_descr->tx_frag_end.cpi = 0;
1369  tx_descr->tx_frag_end.pad = 0;
1370#endif
1371  
1372  tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1373  tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1374  tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1375  
1376  while (tx_give (dev, &tx))
1377    schedule();
1378  return 0;
1379}
1380
1381/********** Change QoS on a VC **********/
1382
1383// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1384
1385/********** Free RX Socket Buffer **********/
1386
1387#if 0
1388static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1389  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1390  amb_vcc * vcc = AMB_VCC (atm_vcc);
1391  unsigned char pool = vcc->rx_info.pool;
1392  rx_in rx;
1393  
1394  // This may be unsafe for various reasons that I cannot really guess
1395  // at. However, I note that the ATM layer calls kfree_skb rather
1396  // than dev_kfree_skb at this point so we are least covered as far
1397  // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1398
1399  PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1400          skb, atm_vcc, vcc);
1401  
1402  rx.handle = virt_to_bus (skb);
1403  rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1404  
1405  skb->data = skb->head;
1406  skb_reset_tail_pointer(skb);
1407  skb->len = 0;
1408  
1409  if (!rx_give (dev, &rx, pool)) {
1410    // success
1411    PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1412    return;
1413  }
1414  
1415  // just do what the ATM layer would have done
1416  dev_kfree_skb_any (skb);
1417  
1418  return;
1419}
1420#endif
1421
1422/********** Proc File Output **********/
1423
1424static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1425  amb_dev * dev = AMB_DEV (atm_dev);
1426  int left = *pos;
1427  unsigned char pool;
1428  
1429  PRINTD (DBG_FLOW, "amb_proc_read");
1430  
1431  /* more diagnostics here? */
1432  
1433  if (!left--) {
1434    amb_stats * s = &dev->stats;
1435    return sprintf (page,
1436                    "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1437                    "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1438                    s->tx_ok, s->rx.ok, s->rx.error,
1439                    s->rx.badcrc, s->rx.toolong,
1440                    s->rx.aborted, s->rx.unused);
1441  }
1442  
1443  if (!left--) {
1444    amb_cq * c = &dev->cq;
1445    return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1446                    c->pending, c->high, c->maximum);
1447  }
1448  
1449  if (!left--) {
1450    amb_txq * t = &dev->txq;
1451    return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1452                    t->pending, t->maximum, t->high, t->filled);
1453  }
1454  
1455  if (!left--) {
1456    unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1457    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1458      amb_rxq * r = &dev->rxq[pool];
1459      count += sprintf (page+count, " %u/%u/%u %u %u",
1460                        r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1461    }
1462    count += sprintf (page+count, ".\n");
1463    return count;
1464  }
1465  
1466  if (!left--) {
1467    unsigned int count = sprintf (page, "RX buffer sizes:");
1468    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1469      amb_rxq * r = &dev->rxq[pool];
1470      count += sprintf (page+count, " %u", r->buffer_size);
1471    }
1472    count += sprintf (page+count, ".\n");
1473    return count;
1474  }
1475  
1476#if 0
1477  if (!left--) {
1478    // suni block etc?
1479  }
1480#endif
1481  
1482  return 0;
1483}
1484
1485/********** Operation Structure **********/
1486
1487static const struct atmdev_ops amb_ops = {
1488  .open         = amb_open,
1489  .close        = amb_close,
1490  .send         = amb_send,
1491  .proc_read    = amb_proc_read,
1492  .owner        = THIS_MODULE,
1493};
1494
1495/********** housekeeping **********/
1496static void do_housekeeping (unsigned long arg) {
1497  amb_dev * dev = (amb_dev *) arg;
1498  
1499  // could collect device-specific (not driver/atm-linux) stats here
1500      
1501  // last resort refill once every ten seconds
1502  fill_rx_pools (dev);
1503  mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1504  
1505  return;
1506}
1507
1508/********** creation of communication queues **********/
1509
1510static int create_queues(amb_dev *dev, unsigned int cmds, unsigned int txs,
1511                         unsigned int *rxs, unsigned int *rx_buffer_sizes)
1512{
1513  unsigned char pool;
1514  size_t total = 0;
1515  void * memory;
1516  void * limit;
1517  
1518  PRINTD (DBG_FLOW, "create_queues %p", dev);
1519  
1520  total += cmds * sizeof(command);
1521  
1522  total += txs * (sizeof(tx_in) + sizeof(tx_out));
1523  
1524  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1525    total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1526  
1527  memory = kmalloc (total, GFP_KERNEL);
1528  if (!memory) {
1529    PRINTK (KERN_ERR, "could not allocate queues");
1530    return -ENOMEM;
1531  }
1532  if (check_area (memory, total)) {
1533    PRINTK (KERN_ERR, "queues allocated in nasty area");
1534    kfree (memory);
1535    return -ENOMEM;
1536  }
1537  
1538  limit = memory + total;
1539  PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1540  
1541  PRINTD (DBG_CMD, "command queue at %p", memory);
1542  
1543  {
1544    command * cmd = memory;
1545    amb_cq * cq = &dev->cq;
1546    
1547    cq->pending = 0;
1548    cq->high = 0;
1549    cq->maximum = cmds - 1;
1550    
1551    cq->ptrs.start = cmd;
1552    cq->ptrs.in = cmd;
1553    cq->ptrs.out = cmd;
1554    cq->ptrs.limit = cmd + cmds;
1555    
1556    memory = cq->ptrs.limit;
1557  }
1558  
1559  PRINTD (DBG_TX, "TX queue pair at %p", memory);
1560  
1561  {
1562    tx_in * in = memory;
1563    tx_out * out;
1564    amb_txq * txq = &dev->txq;
1565    
1566    txq->pending = 0;
1567    txq->high = 0;
1568    txq->filled = 0;
1569    txq->maximum = txs - 1;
1570    
1571    txq->in.start = in;
1572    txq->in.ptr = in;
1573    txq->in.limit = in + txs;
1574    
1575    memory = txq->in.limit;
1576    out = memory;
1577    
1578    txq->out.start = out;
1579    txq->out.ptr = out;
1580    txq->out.limit = out + txs;
1581    
1582    memory = txq->out.limit;
1583  }
1584  
1585  PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1586  
1587  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1588    rx_in * in = memory;
1589    rx_out * out;
1590    amb_rxq * rxq = &dev->rxq[pool];
1591    
1592    rxq->buffer_size = rx_buffer_sizes[pool];
1593    rxq->buffers_wanted = 0;
1594    
1595    rxq->pending = 0;
1596    rxq->low = rxs[pool] - 1;
1597    rxq->emptied = 0;
1598    rxq->maximum = rxs[pool] - 1;
1599    
1600    rxq->in.start = in;
1601    rxq->in.ptr = in;
1602    rxq->in.limit = in + rxs[pool];
1603    
1604    memory = rxq->in.limit;
1605    out = memory;
1606    
1607    rxq->out.start = out;
1608    rxq->out.ptr = out;
1609    rxq->out.limit = out + rxs[pool];
1610    
1611    memory = rxq->out.limit;
1612  }
1613  
1614  if (memory == limit) {
1615    return 0;
1616  } else {
1617    PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1618    kfree (limit - total);
1619    return -ENOMEM;
1620  }
1621  
1622}
1623
1624/********** destruction of communication queues **********/
1625
1626static void destroy_queues (amb_dev * dev) {
1627  // all queues assumed empty
1628  void * memory = dev->cq.ptrs.start;
1629  // includes txq.in, txq.out, rxq[].in and rxq[].out
1630  
1631  PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1632  
1633  PRINTD (DBG_INIT, "freeing queues at %p", memory);
1634  kfree (memory);
1635  
1636  return;
1637}
1638
1639/********** basic loader commands and error handling **********/
1640// centisecond timeouts - guessing away here
1641static unsigned int command_timeouts [] = {
1642        [host_memory_test]     = 15,
1643        [read_adapter_memory]  = 2,
1644        [write_adapter_memory] = 2,
1645        [adapter_start]        = 50,
1646        [get_version_number]   = 10,
1647        [interrupt_host]       = 1,
1648        [flash_erase_sector]   = 1,
1649        [adap_download_block]  = 1,
1650        [adap_erase_flash]     = 1,
1651        [adap_run_in_iram]     = 1,
1652        [adap_end_download]    = 1
1653};
1654
1655
1656static unsigned int command_successes [] = {
1657        [host_memory_test]     = COMMAND_PASSED_TEST,
1658        [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1659        [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1660        [adapter_start]        = COMMAND_COMPLETE,
1661        [get_version_number]   = COMMAND_COMPLETE,
1662        [interrupt_host]       = COMMAND_COMPLETE,
1663        [flash_erase_sector]   = COMMAND_COMPLETE,
1664        [adap_download_block]  = COMMAND_COMPLETE,
1665        [adap_erase_flash]     = COMMAND_COMPLETE,
1666        [adap_run_in_iram]     = COMMAND_COMPLETE,
1667        [adap_end_download]    = COMMAND_COMPLETE
1668};
1669  
1670static  int decode_loader_result (loader_command cmd, u32 result)
1671{
1672        int res;
1673        const char *msg;
1674
1675        if (result == command_successes[cmd])
1676                return 0;
1677
1678        switch (result) {
1679                case BAD_COMMAND:
1680                        res = -EINVAL;
1681                        msg = "bad command";
1682                        break;
1683                case COMMAND_IN_PROGRESS:
1684                        res = -ETIMEDOUT;
1685                        msg = "command in progress";
1686                        break;
1687                case COMMAND_PASSED_TEST:
1688                        res = 0;
1689                        msg = "command passed test";
1690                        break;
1691                case COMMAND_FAILED_TEST:
1692                        res = -EIO;
1693                        msg = "command failed test";
1694                        break;
1695                case COMMAND_READ_DATA_OK:
1696                        res = 0;
1697                        msg = "command read data ok";
1698                        break;
1699                case COMMAND_READ_BAD_ADDRESS:
1700                        res = -EINVAL;
1701                        msg = "command read bad address";
1702                        break;
1703                case COMMAND_WRITE_DATA_OK:
1704                        res = 0;
1705                        msg = "command write data ok";
1706                        break;
1707                case COMMAND_WRITE_BAD_ADDRESS:
1708                        res = -EINVAL;
1709                        msg = "command write bad address";
1710                        break;
1711                case COMMAND_WRITE_FLASH_FAILURE:
1712                        res = -EIO;
1713                        msg = "command write flash failure";
1714                        break;
1715                case COMMAND_COMPLETE:
1716                        res = 0;
1717                        msg = "command complete";
1718                        break;
1719                case COMMAND_FLASH_ERASE_FAILURE:
1720                        res = -EIO;
1721                        msg = "command flash erase failure";
1722                        break;
1723                case COMMAND_WRITE_BAD_DATA:
1724                        res = -EINVAL;
1725                        msg = "command write bad data";
1726                        break;
1727                default:
1728                        res = -EINVAL;
1729                        msg = "unknown error";
1730                        PRINTD (DBG_LOAD|DBG_ERR,
1731                                "decode_loader_result got %d=%x !",
1732                                result, result);
1733                        break;
1734        }
1735
1736        PRINTK (KERN_ERR, "%s", msg);
1737        return res;
1738}
1739
1740static int do_loader_command(volatile loader_block *lb, const amb_dev *dev,
1741                             loader_command cmd)
1742{
1743  
1744  unsigned long timeout;
1745  
1746  PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1747  
1748  /* do a command
1749     
1750     Set the return value to zero, set the command type and set the
1751     valid entry to the right magic value. The payload is already
1752     correctly byte-ordered so we leave it alone. Hit the doorbell
1753     with the bus address of this structure.
1754     
1755  */
1756  
1757  lb->result = 0;
1758  lb->command = cpu_to_be32 (cmd);
1759  lb->valid = cpu_to_be32 (DMA_VALID);
1760  // dump_registers (dev);
1761  // dump_loader_block (lb);
1762  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1763  
1764  timeout = command_timeouts[cmd] * 10;
1765  
1766  while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1767    if (timeout) {
1768      timeout = msleep_interruptible(timeout);
1769    } else {
1770      PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1771      dump_registers (dev);
1772      dump_loader_block (lb);
1773      return -ETIMEDOUT;
1774    }
1775  
1776  if (cmd == adapter_start) {
1777    // wait for start command to acknowledge...
1778    timeout = 100;
1779    while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1780      if (timeout) {
1781        timeout = msleep_interruptible(timeout);
1782      } else {
1783        PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1784                be32_to_cpu (lb->result));
1785        dump_registers (dev);
1786        return -ETIMEDOUT;
1787      }
1788    return 0;
1789  } else {
1790    return decode_loader_result (cmd, be32_to_cpu (lb->result));
1791  }
1792  
1793}
1794
1795/* loader: determine loader version */
1796
1797static int get_loader_version(loader_block *lb, const amb_dev *dev,
1798                              u32 *version)
1799{
1800  int res;
1801  
1802  PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1803  
1804  res = do_loader_command (lb, dev, get_version_number);
1805  if (res)
1806    return res;
1807  if (version)
1808    *version = be32_to_cpu (lb->payload.version);
1809  return 0;
1810}
1811
1812/* loader: write memory data blocks */
1813
1814static int loader_write(loader_block *lb, const amb_dev *dev,
1815                        const struct ihex_binrec *rec)
1816{
1817  transfer_block * tb = &lb->payload.transfer;
1818  
1819  PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1820
1821  tb->address = rec->addr;
1822  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1823  memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
1824  return do_loader_command (lb, dev, write_adapter_memory);
1825}
1826
1827/* loader: verify memory data blocks */
1828
1829static int loader_verify(loader_block *lb, const amb_dev *dev,
1830                         const struct ihex_binrec *rec)
1831{
1832  transfer_block * tb = &lb->payload.transfer;
1833  int res;
1834  
1835  PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1836  
1837  tb->address = rec->addr;
1838  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1839  res = do_loader_command (lb, dev, read_adapter_memory);
1840  if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
1841    res = -EINVAL;
1842  return res;
1843}
1844
1845/* loader: start microcode */
1846
1847static int loader_start(loader_block *lb, const amb_dev *dev, u32 address)
1848{
1849  PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1850  
1851  lb->payload.start = cpu_to_be32 (address);
1852  return do_loader_command (lb, dev, adapter_start);
1853}
1854
1855/********** reset card **********/
1856
1857static inline void sf (const char * msg)
1858{
1859        PRINTK (KERN_ERR, "self-test failed: %s", msg);
1860}
1861
1862static int amb_reset (amb_dev * dev, int diags) {
1863  u32 word;
1864  
1865  PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1866  
1867  word = rd_plain (dev, offsetof(amb_mem, reset_control));
1868  // put card into reset state
1869  wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1870  // wait a short while
1871  udelay (10);
1872#if 1
1873  // put card into known good state
1874  wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1875  // clear all interrupts just in case
1876  wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1877#endif
1878  // clear self-test done flag
1879  wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1880  // take card out of reset state
1881  wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1882  
1883  if (diags) { 
1884    unsigned long timeout;
1885    // 4.2 second wait
1886    msleep(4200);
1887    // half second time-out
1888    timeout = 500;
1889    while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1890      if (timeout) {
1891        timeout = msleep_interruptible(timeout);
1892      } else {
1893        PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1894        return -ETIMEDOUT;
1895      }
1896    
1897    // get results of self-test
1898    // XXX double check byte-order
1899    word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1900    if (word & SELF_TEST_FAILURE) {
1901      if (word & GPINT_TST_FAILURE)
1902        sf ("interrupt");
1903      if (word & SUNI_DATA_PATTERN_FAILURE)
1904        sf ("SUNI data pattern");
1905      if (word & SUNI_DATA_BITS_FAILURE)
1906        sf ("SUNI data bits");
1907      if (word & SUNI_UTOPIA_FAILURE)
1908        sf ("SUNI UTOPIA interface");
1909      if (word & SUNI_FIFO_FAILURE)
1910        sf ("SUNI cell buffer FIFO");
1911      if (word & SRAM_FAILURE)
1912        sf ("bad SRAM");
1913      // better return value?
1914      return -EIO;
1915    }
1916    
1917  }
1918  return 0;
1919}
1920
1921/********** transfer and start the microcode **********/
1922
1923static int ucode_init(loader_block *lb, amb_dev *dev)
1924{
1925  const struct firmware *fw;
1926  unsigned long start_address;
1927  const struct ihex_binrec *rec;
1928  const char *errmsg = 0;
1929  int res;
1930
1931  res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
1932  if (res) {
1933    PRINTK (KERN_ERR, "Cannot load microcode data");
1934    return res;
1935  }
1936
1937  /* First record contains just the start address */
1938  rec = (const struct ihex_binrec *)fw->data;
1939  if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
1940    errmsg = "no start record";
1941    goto fail;
1942  }
1943  start_address = be32_to_cpup((__be32 *)rec->data);
1944
1945  rec = ihex_next_binrec(rec);
1946
1947  PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1948
1949  while (rec) {
1950    PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
1951            be16_to_cpu(rec->len));
1952    if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
1953            errmsg = "record too long";
1954            goto fail;
1955    }
1956    if (be16_to_cpu(rec->len) & 3) {
1957            errmsg = "odd number of bytes";
1958            goto fail;
1959    }
1960    res = loader_write(lb, dev, rec);
1961    if (res)
1962      break;
1963
1964    res = loader_verify(lb, dev, rec);
1965    if (res)
1966      break;
1967    rec = ihex_next_binrec(rec);
1968  }
1969  release_firmware(fw);
1970  if (!res)
1971    res = loader_start(lb, dev, start_address);
1972
1973  return res;
1974fail:
1975  release_firmware(fw);
1976  PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
1977  return -EINVAL;
1978}
1979
1980/********** give adapter parameters **********/
1981  
1982static inline __be32 bus_addr(void * addr) {
1983    return cpu_to_be32 (virt_to_bus (addr));
1984}
1985
1986static int amb_talk(amb_dev *dev)
1987{
1988  adap_talk_block a;
1989  unsigned char pool;
1990  unsigned long timeout;
1991  
1992  PRINTD (DBG_FLOW, "amb_talk %p", dev);
1993  
1994  a.command_start = bus_addr (dev->cq.ptrs.start);
1995  a.command_end   = bus_addr (dev->cq.ptrs.limit);
1996  a.tx_start      = bus_addr (dev->txq.in.start);
1997  a.tx_end        = bus_addr (dev->txq.in.limit);
1998  a.txcom_start   = bus_addr (dev->txq.out.start);
1999  a.txcom_end     = bus_addr (dev->txq.out.limit);
2000  
2001  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
2002    // the other "a" items are set up by the adapter
2003    a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2004    a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2005    a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2006    a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2007    a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2008  }
2009  
2010#ifdef AMB_NEW_MICROCODE
2011  // disable fast PLX prefetching
2012  a.init_flags = 0;
2013#endif
2014  
2015  // pass the structure
2016  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2017  
2018  // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2019  msleep(2200);
2020  // give the adapter another half second?
2021  timeout = 500;
2022  while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2023    if (timeout) {
2024      timeout = msleep_interruptible(timeout);
2025    } else {
2026      PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2027      return -ETIMEDOUT;
2028    }
2029  
2030  return 0;
2031}
2032
2033// get microcode version
2034static void amb_ucode_version(amb_dev *dev)
2035{
2036  u32 major;
2037  u32 minor;
2038  command cmd;
2039  cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2040  while (command_do (dev, &cmd)) {
2041    set_current_state(TASK_UNINTERRUPTIBLE);
2042    schedule();
2043  }
2044  major = be32_to_cpu (cmd.args.version.major);
2045  minor = be32_to_cpu (cmd.args.version.minor);
2046  PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2047}
2048  
2049// get end station address
2050static void amb_esi(amb_dev *dev, u8 *esi)
2051{
2052  u32 lower4;
2053  u16 upper2;
2054  command cmd;
2055  
2056  cmd.request = cpu_to_be32 (SRB_GET_BIA);
2057  while (command_do (dev, &cmd)) {
2058    set_current_state(TASK_UNINTERRUPTIBLE);
2059    schedule();
2060  }
2061  lower4 = be32_to_cpu (cmd.args.bia.lower4);
2062  upper2 = be32_to_cpu (cmd.args.bia.upper2);
2063  PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2064  
2065  if (esi) {
2066    unsigned int i;
2067    
2068    PRINTDB (DBG_INIT, "ESI:");
2069    for (i = 0; i < ESI_LEN; ++i) {
2070      if (i < 4)
2071          esi[i] = bitrev8(lower4>>(8*i));
2072      else
2073          esi[i] = bitrev8(upper2>>(8*(i-4)));
2074      PRINTDM (DBG_INIT, " %02x", esi[i]);
2075    }
2076    
2077    PRINTDE (DBG_INIT, "");
2078  }
2079  
2080  return;
2081}
2082  
2083static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2084{
2085        // fix up the PLX-mapped window base address to match the block
2086        unsigned long blb;
2087        u32 mapreg;
2088        blb = virt_to_bus(lb);
2089        // the kernel stack had better not ever cross a 1Gb boundary!
2090        mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2091        mapreg &= ~onegigmask;
2092        mapreg |= blb & onegigmask;
2093        wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2094        return;
2095}
2096
2097static int amb_init(amb_dev *dev)
2098{
2099  loader_block lb;
2100  
2101  u32 version;
2102  
2103  if (amb_reset (dev, 1)) {
2104    PRINTK (KERN_ERR, "card reset failed!");
2105  } else {
2106    fixup_plx_window (dev, &lb);
2107    
2108    if (get_loader_version (&lb, dev, &version)) {
2109      PRINTK (KERN_INFO, "failed to get loader version");
2110    } else {
2111      PRINTK (KERN_INFO, "loader version is %08x", version);
2112      
2113      if (ucode_init (&lb, dev)) {
2114        PRINTK (KERN_ERR, "microcode failure");
2115      } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2116        PRINTK (KERN_ERR, "failed to get memory for queues");
2117      } else {
2118        
2119        if (amb_talk (dev)) {
2120          PRINTK (KERN_ERR, "adapter did not accept queues");
2121        } else {
2122          
2123          amb_ucode_version (dev);
2124          return 0;
2125          
2126        } /* amb_talk */
2127        
2128        destroy_queues (dev);
2129      } /* create_queues, ucode_init */
2130      
2131      amb_reset (dev, 0);
2132    } /* get_loader_version */
2133    
2134  } /* amb_reset */
2135  
2136  return -EINVAL;
2137}
2138
2139static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) 
2140{
2141      unsigned char pool;
2142      
2143      // set up known dev items straight away
2144      dev->pci_dev = pci_dev; 
2145      pci_set_drvdata(pci_dev, dev);
2146      
2147      dev->iobase = pci_resource_start (pci_dev, 1);
2148      dev->irq = pci_dev->irq; 
2149      dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2150      
2151      // flags (currently only dead)
2152      dev->flags = 0;
2153      
2154      // Allocate cell rates (fibre)
2155      // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2156      // to be really pedantic, this should be ATM_OC3c_PCR
2157      dev->tx_avail = ATM_OC3_PCR;
2158      dev->rx_avail = ATM_OC3_PCR;
2159      
2160      // semaphore for txer/rxer modifications - we cannot use a
2161      // spinlock as the critical region needs to switch processes
2162      mutex_init(&dev->vcc_sf);
2163      // queue manipulation spinlocks; we want atomic reads and
2164      // writes to the queue descriptors (handles IRQ and SMP)
2165      // consider replacing "int pending" -> "atomic_t available"
2166      // => problem related to who gets to move queue pointers
2167      spin_lock_init (&dev->cq.lock);
2168      spin_lock_init (&dev->txq.lock);
2169      for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2170        spin_lock_init (&dev->rxq[pool].lock);
2171}
2172
2173static void setup_pci_dev(struct pci_dev *pci_dev)
2174{
2175        unsigned char lat;
2176      
2177        // enable bus master accesses
2178        pci_set_master(pci_dev);
2179
2180        // frobnicate latency (upwards, usually)
2181        pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2182
2183        if (!pci_lat)
2184                pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2185
2186        if (lat != pci_lat) {
2187                PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2188                        lat, pci_lat);
2189                pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2190        }
2191}
2192
2193static int amb_probe(struct pci_dev *pci_dev,
2194                     const struct pci_device_id *pci_ent)
2195{
2196        amb_dev * dev;
2197        int err;
2198        unsigned int irq;
2199      
2200        err = pci_enable_device(pci_dev);
2201        if (err < 0) {
2202                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2203                goto out;
2204        }
2205
2206        // read resources from PCI configuration space
2207        irq = pci_dev->irq;
2208
2209        if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2210                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2211                err = -EINVAL;
2212                goto out_disable;
2213        }
2214
2215        PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2216                " IO %llx, IRQ %u, MEM %p",
2217                (unsigned long long)pci_resource_start(pci_dev, 1),
2218                irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2219
2220        // check IO region
2221        err = pci_request_region(pci_dev, 1, DEV_LABEL);
2222        if (err < 0) {
2223                PRINTK (KERN_ERR, "IO range already in use!");
2224                goto out_disable;
2225        }
2226
2227        dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
2228        if (!dev) {
2229                PRINTK (KERN_ERR, "out of memory!");
2230                err = -ENOMEM;
2231                goto out_release;
2232        }
2233
2234        setup_dev(dev, pci_dev);
2235
2236        err = amb_init(dev);
2237        if (err < 0) {
2238                PRINTK (KERN_ERR, "adapter initialisation failure");
2239                goto out_free;
2240        }
2241
2242        setup_pci_dev(pci_dev);
2243
2244        // grab (but share) IRQ and install handler
2245        err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2246        if (err < 0) {
2247                PRINTK (KERN_ERR, "request IRQ failed!");
2248                goto out_reset;
2249        }
2250
2251        dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1,
2252                                         NULL);
2253        if (!dev->atm_dev) {
2254                PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2255                err = -EINVAL;
2256                goto out_free_irq;
2257        }
2258
2259        PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2260                dev->atm_dev->number, dev, dev->atm_dev);
2261                dev->atm_dev->dev_data = (void *) dev;
2262
2263        // register our address
2264        amb_esi (dev, dev->atm_dev->esi);
2265
2266        // 0 bits for vpi, 10 bits for vci
2267        dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2268        dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2269
2270        init_timer(&dev->housekeeping);
2271        dev->housekeeping.function = do_housekeeping;
2272        dev->housekeeping.data = (unsigned long) dev;
2273        mod_timer(&dev->housekeeping, jiffies);
2274
2275        // enable host interrupts
2276        interrupts_on (dev);
2277
2278out:
2279        return err;
2280
2281out_free_irq:
2282        free_irq(irq, dev);
2283out_reset:
2284        amb_reset(dev, 0);
2285out_free:
2286        kfree(dev);
2287out_release:
2288        pci_release_region(pci_dev, 1);
2289out_disable:
2290        pci_disable_device(pci_dev);
2291        goto out;
2292}
2293
2294
2295static void amb_remove_one(struct pci_dev *pci_dev)
2296{
2297        struct amb_dev *dev;
2298
2299        dev = pci_get_drvdata(pci_dev);
2300
2301        PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2302        del_timer_sync(&dev->housekeeping);
2303        // the drain should not be necessary
2304        drain_rx_pools(dev);
2305        interrupts_off(dev);
2306        amb_reset(dev, 0);
2307        free_irq(dev->irq, dev);
2308        pci_disable_device(pci_dev);
2309        destroy_queues(dev);
2310        atm_dev_deregister(dev->atm_dev);
2311        kfree(dev);
2312        pci_release_region(pci_dev, 1);
2313}
2314
2315static void __init amb_check_args (void) {
2316  unsigned char pool;
2317  unsigned int max_rx_size;
2318  
2319#ifdef DEBUG_AMBASSADOR
2320  PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2321#else
2322  if (debug)
2323    PRINTK (KERN_NOTICE, "no debugging support");
2324#endif
2325  
2326  if (cmds < MIN_QUEUE_SIZE)
2327    PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2328            cmds = MIN_QUEUE_SIZE);
2329  
2330  if (txs < MIN_QUEUE_SIZE)
2331    PRINTK (KERN_NOTICE, "txs has been raised to %u",
2332            txs = MIN_QUEUE_SIZE);
2333  
2334  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2335    if (rxs[pool] < MIN_QUEUE_SIZE)
2336      PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2337              pool, rxs[pool] = MIN_QUEUE_SIZE);
2338  
2339  // buffers sizes should be greater than zero and strictly increasing
2340  max_rx_size = 0;
2341  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2342    if (rxs_bs[pool] <= max_rx_size)
2343      PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2344              pool, rxs_bs[pool]);
2345    else
2346      max_rx_size = rxs_bs[pool];
2347  
2348  if (rx_lats < MIN_RX_BUFFERS)
2349    PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2350            rx_lats = MIN_RX_BUFFERS);
2351  
2352  return;
2353}
2354
2355/********** module stuff **********/
2356
2357MODULE_AUTHOR(maintainer_string);
2358MODULE_DESCRIPTION(description_string);
2359MODULE_LICENSE("GPL");
2360MODULE_FIRMWARE("atmsar11.fw");
2361module_param(debug,   ushort, 0644);
2362module_param(cmds,    uint, 0);
2363module_param(txs,     uint, 0);
2364module_param_array(rxs,     uint, NULL, 0);
2365module_param_array(rxs_bs,  uint, NULL, 0);
2366module_param(rx_lats, uint, 0);
2367module_param(pci_lat, byte, 0);
2368MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2369MODULE_PARM_DESC(cmds,    "number of command queue entries");
2370MODULE_PARM_DESC(txs,     "number of TX queue entries");
2371MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2372MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2373MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2374MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2375
2376/********** module entry **********/
2377
2378static struct pci_device_id amb_pci_tbl[] = {
2379        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
2380        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
2381        { 0, }
2382};
2383
2384MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2385
2386static struct pci_driver amb_driver = {
2387        .name =         "amb",
2388        .probe =        amb_probe,
2389        .remove =       amb_remove_one,
2390        .id_table =     amb_pci_tbl,
2391};
2392
2393static int __init amb_module_init (void)
2394{
2395  PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2396  
2397  // sanity check - cast needed as printk does not support %Zu
2398  if (sizeof(amb_mem) != 4*16 + 4*12) {
2399    PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2400            (unsigned long) sizeof(amb_mem));
2401    return -ENOMEM;
2402  }
2403  
2404  show_version();
2405  
2406  amb_check_args();
2407  
2408  // get the juice
2409  return pci_register_driver(&amb_driver);
2410}
2411
2412/********** module exit **********/
2413
2414static void __exit amb_module_exit (void)
2415{
2416  PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2417
2418  pci_unregister_driver(&amb_driver);
2419}
2420
2421module_init(amb_module_init);
2422module_exit(amb_module_exit);
2423