linux/drivers/atm/ambassador.c
<<
>>
Prefs
   1/*
   2  Madge Ambassador ATM Adapter driver.
   3  Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5  This program is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published by
   7  the Free Software Foundation; either version 2 of the License, or
   8  (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful,
  11  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  GNU General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, write to the Free Software
  17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19  The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20  system and in the file COPYING in the Linux kernel source.
  21*/
  22
  23/* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25#include <linux/module.h>
  26#include <linux/types.h>
  27#include <linux/pci.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/ioport.h>
  31#include <linux/atmdev.h>
  32#include <linux/delay.h>
  33#include <linux/interrupt.h>
  34#include <linux/poison.h>
  35#include <linux/bitrev.h>
  36#include <linux/mutex.h>
  37#include <linux/firmware.h>
  38#include <linux/ihex.h>
  39
  40#include <asm/atomic.h>
  41#include <asm/io.h>
  42#include <asm/byteorder.h>
  43
  44#include "ambassador.h"
  45
  46#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  47#define description_string "Madge ATM Ambassador driver"
  48#define version_string "1.2.4"
  49
  50static inline void __init show_version (void) {
  51  printk ("%s version %s\n", description_string, version_string);
  52}
  53
  54/*
  55  
  56  Theory of Operation
  57  
  58  I Hardware, detection, initialisation and shutdown.
  59  
  60  1. Supported Hardware
  61  
  62  This driver is for the PCI ATMizer-based Ambassador card (except
  63  very early versions). It is not suitable for the similar EISA "TR7"
  64  card. Commercially, both cards are known as Collage Server ATM
  65  adapters.
  66  
  67  The loader supports image transfer to the card, image start and few
  68  other miscellaneous commands.
  69  
  70  Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  71  
  72  The cards are big-endian.
  73  
  74  2. Detection
  75  
  76  Standard PCI stuff, the early cards are detected and rejected.
  77  
  78  3. Initialisation
  79  
  80  The cards are reset and the self-test results are checked. The
  81  microcode image is then transferred and started. This waits for a
  82  pointer to a descriptor containing details of the host-based queues
  83  and buffers and various parameters etc. Once they are processed
  84  normal operations may begin. The BIA is read using a microcode
  85  command.
  86  
  87  4. Shutdown
  88  
  89  This may be accomplished either by a card reset or via the microcode
  90  shutdown command. Further investigation required.
  91  
  92  5. Persistent state
  93  
  94  The card reset does not affect PCI configuration (good) or the
  95  contents of several other "shared run-time registers" (bad) which
  96  include doorbell and interrupt control as well as EEPROM and PCI
  97  control. The driver must be careful when modifying these registers
  98  not to touch bits it does not use and to undo any changes at exit.
  99  
 100  II Driver software
 101  
 102  0. Generalities
 103  
 104  The adapter is quite intelligent (fast) and has a simple interface
 105  (few features). VPI is always zero, 1024 VCIs are supported. There
 106  is limited cell rate support. UBR channels can be capped and ABR
 107  (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 108  support.
 109  
 110  1. Driver <-> Adapter Communication
 111  
 112  Apart from the basic loader commands, the driver communicates
 113  through three entities: the command queue (CQ), the transmit queue
 114  pair (TXQ) and the receive queue pairs (RXQ). These three entities
 115  are set up by the host and passed to the microcode just after it has
 116  been started.
 117  
 118  All queues are host-based circular queues. They are contiguous and
 119  (due to hardware limitations) have some restrictions as to their
 120  locations in (bus) memory. They are of the "full means the same as
 121  empty so don't do that" variety since the adapter uses pointers
 122  internally.
 123  
 124  The queue pairs work as follows: one queue is for supply to the
 125  adapter, items in it are pending and are owned by the adapter; the
 126  other is the queue for return from the adapter, items in it have
 127  been dealt with by the adapter. The host adds items to the supply
 128  (TX descriptors and free RX buffer descriptors) and removes items
 129  from the return (TX and RX completions). The adapter deals with out
 130  of order completions.
 131  
 132  Interrupts (card to host) and the doorbell (host to card) are used
 133  for signalling.
 134  
 135  1. CQ
 136  
 137  This is to communicate "open VC", "close VC", "get stats" etc. to
 138  the adapter. At most one command is retired every millisecond by the
 139  card. There is no out of order completion or notification. The
 140  driver needs to check the return code of the command, waiting as
 141  appropriate.
 142  
 143  2. TXQ
 144  
 145  TX supply items are of variable length (scatter gather support) and
 146  so the queue items are (more or less) pointers to the real thing.
 147  Each TX supply item contains a unique, host-supplied handle (the skb
 148  bus address seems most sensible as this works for Alphas as well,
 149  there is no need to do any endian conversions on the handles).
 150  
 151  TX return items consist of just the handles above.
 152  
 153  3. RXQ (up to 4 of these with different lengths and buffer sizes)
 154  
 155  RX supply items consist of a unique, host-supplied handle (the skb
 156  bus address again) and a pointer to the buffer data area.
 157  
 158  RX return items consist of the handle above, the VC, length and a
 159  status word. This just screams "oh so easy" doesn't it?
 160
 161  Note on RX pool sizes:
 162   
 163  Each pool should have enough buffers to handle a back-to-back stream
 164  of minimum sized frames on a single VC. For example:
 165  
 166    frame spacing = 3us (about right)
 167    
 168    delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 169    
 170    min number of buffers for one VC = 1 + delay/spacing (buffers)
 171
 172    delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 173    
 174  The 20us delay assumes that there is no need to sleep; if we need to
 175  sleep to get buffers we are going to drop frames anyway.
 176  
 177  In fact, each pool should have enough buffers to support the
 178  simultaneous reassembly of a separate frame on each VC and cope with
 179  the case in which frames complete in round robin cell fashion on
 180  each VC.
 181  
 182  Only one frame can complete at each cell arrival, so if "n" VCs are
 183  open, the worst case is to have them all complete frames together
 184  followed by all starting new frames together.
 185  
 186    desired number of buffers = n + delay/spacing
 187    
 188  These are the extreme requirements, however, they are "n+k" for some
 189  "k" so we have only the constant to choose. This is the argument
 190  rx_lats which current defaults to 7.
 191  
 192  Actually, "n ? n+k : 0" is better and this is what is implemented,
 193  subject to the limit given by the pool size.
 194  
 195  4. Driver locking
 196  
 197  Simple spinlocks are used around the TX and RX queue mechanisms.
 198  Anyone with a faster, working method is welcome to implement it.
 199  
 200  The adapter command queue is protected with a spinlock. We always
 201  wait for commands to complete.
 202  
 203  A more complex form of locking is used around parts of the VC open
 204  and close functions. There are three reasons for a lock: 1. we need
 205  to do atomic rate reservation and release (not used yet), 2. Opening
 206  sometimes involves two adapter commands which must not be separated
 207  by another command on the same VC, 3. the changes to RX pool size
 208  must be atomic. The lock needs to work over context switches, so we
 209  use a semaphore.
 210  
 211  III Hardware Features and Microcode Bugs
 212  
 213  1. Byte Ordering
 214  
 215  *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 216  
 217  2. Memory access
 218  
 219  All structures that are not accessed using DMA must be 4-byte
 220  aligned (not a problem) and must not cross 4MB boundaries.
 221  
 222  There is a DMA memory hole at E0000000-E00000FF (groan).
 223  
 224  TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 225  but for a hardware bug).
 226  
 227  RX buffers (DMA write) must not cross 16MB boundaries and must
 228  include spare trailing bytes up to the next 4-byte boundary; they
 229  will be written with rubbish.
 230  
 231  The PLX likes to prefetch; if reading up to 4 u32 past the end of
 232  each TX fragment is not a problem, then TX can be made to go a
 233  little faster by passing a flag at init that disables a prefetch
 234  workaround. We do not pass this flag. (new microcode only)
 235  
 236  Now we:
 237  . Note that alloc_skb rounds up size to a 16byte boundary.  
 238  . Ensure all areas do not traverse 4MB boundaries.
 239  . Ensure all areas do not start at a E00000xx bus address.
 240  (I cannot be certain, but this may always hold with Linux)
 241  . Make all failures cause a loud message.
 242  . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 243  . Discard non-conforming TX fragment descriptors (the TX fails).
 244  In the future we could:
 245  . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 246  . Segment TX areas into some/more fragments, when necessary.
 247  . Relax checks for non-DMA items (ignore hole).
 248  . Give scatter-gather (iovec) requirements using ???. (?)
 249  
 250  3. VC close is broken (only for new microcode)
 251  
 252  The VC close adapter microcode command fails to do anything if any
 253  frames have been received on the VC but none have been transmitted.
 254  Frames continue to be reassembled and passed (with IRQ) to the
 255  driver.
 256  
 257  IV To Do List
 258  
 259  . Fix bugs!
 260  
 261  . Timer code may be broken.
 262  
 263  . Deal with buggy VC close (somehow) in microcode 12.
 264  
 265  . Handle interrupted and/or non-blocking writes - is this a job for
 266    the protocol layer?
 267  
 268  . Add code to break up TX fragments when they span 4MB boundaries.
 269  
 270  . Add SUNI phy layer (need to know where SUNI lives on card).
 271  
 272  . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 273    leave extra headroom space for Ambassador TX descriptors.
 274  
 275  . Understand these elements of struct atm_vcc: recvq (proto?),
 276    sleep, callback, listenq, backlog_quota, reply and user_back.
 277  
 278  . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 279  
 280  . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 281  
 282  . Decide whether RX buffer recycling is or can be made completely safe;
 283    turn it back on. It looks like Werner is going to axe this.
 284  
 285  . Implement QoS changes on open VCs (involves extracting parts of VC open
 286    and close into separate functions and using them to make changes).
 287  
 288  . Hack on command queue so that someone can issue multiple commands and wait
 289    on the last one (OR only "no-op" or "wait" commands are waited for).
 290  
 291  . Eliminate need for while-schedule around do_command.
 292  
 293*/
 294
 295static void do_housekeeping (unsigned long arg);
 296/********** globals **********/
 297
 298static unsigned short debug = 0;
 299static unsigned int cmds = 8;
 300static unsigned int txs = 32;
 301static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 302static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 303static unsigned int rx_lats = 7;
 304static unsigned char pci_lat = 0;
 305
 306static const unsigned long onegigmask = -1 << 30;
 307
 308/********** access to adapter **********/
 309
 310static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 311  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 312#ifdef AMB_MMIO
 313  dev->membase[addr / sizeof(u32)] = data;
 314#else
 315  outl (data, dev->iobase + addr);
 316#endif
 317}
 318
 319static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 320#ifdef AMB_MMIO
 321  u32 data = dev->membase[addr / sizeof(u32)];
 322#else
 323  u32 data = inl (dev->iobase + addr);
 324#endif
 325  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 326  return data;
 327}
 328
 329static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 330  __be32 be = cpu_to_be32 (data);
 331  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 332#ifdef AMB_MMIO
 333  dev->membase[addr / sizeof(u32)] = be;
 334#else
 335  outl (be, dev->iobase + addr);
 336#endif
 337}
 338
 339static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 340#ifdef AMB_MMIO
 341  __be32 be = dev->membase[addr / sizeof(u32)];
 342#else
 343  __be32 be = inl (dev->iobase + addr);
 344#endif
 345  u32 data = be32_to_cpu (be);
 346  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 347  return data;
 348}
 349
 350/********** dump routines **********/
 351
 352static inline void dump_registers (const amb_dev * dev) {
 353#ifdef DEBUG_AMBASSADOR
 354  if (debug & DBG_REGS) {
 355    size_t i;
 356    PRINTD (DBG_REGS, "reading PLX control: ");
 357    for (i = 0x00; i < 0x30; i += sizeof(u32))
 358      rd_mem (dev, i);
 359    PRINTD (DBG_REGS, "reading mailboxes: ");
 360    for (i = 0x40; i < 0x60; i += sizeof(u32))
 361      rd_mem (dev, i);
 362    PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 363    for (i = 0x60; i < 0x70; i += sizeof(u32))
 364      rd_mem (dev, i);
 365  }
 366#else
 367  (void) dev;
 368#endif
 369  return;
 370}
 371
 372static inline void dump_loader_block (volatile loader_block * lb) {
 373#ifdef DEBUG_AMBASSADOR
 374  unsigned int i;
 375  PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 376           lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 377  for (i = 0; i < MAX_COMMAND_DATA; ++i)
 378    PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 379  PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 380#else
 381  (void) lb;
 382#endif
 383  return;
 384}
 385
 386static inline void dump_command (command * cmd) {
 387#ifdef DEBUG_AMBASSADOR
 388  unsigned int i;
 389  PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 390           cmd, /*be32_to_cpu*/ (cmd->request));
 391  for (i = 0; i < 3; ++i)
 392    PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 393  PRINTDE (DBG_CMD, "");
 394#else
 395  (void) cmd;
 396#endif
 397  return;
 398}
 399
 400static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 401#ifdef DEBUG_AMBASSADOR
 402  unsigned int i;
 403  unsigned char * data = skb->data;
 404  PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 405  for (i=0; i<skb->len && i < 256;i++)
 406    PRINTDM (DBG_DATA, "%02x ", data[i]);
 407  PRINTDE (DBG_DATA,"");
 408#else
 409  (void) prefix;
 410  (void) vc;
 411  (void) skb;
 412#endif
 413  return;
 414}
 415
 416/********** check memory areas for use by Ambassador **********/
 417
 418/* see limitations under Hardware Features */
 419
 420static int check_area (void * start, size_t length) {
 421  // assumes length > 0
 422  const u32 fourmegmask = -1 << 22;
 423  const u32 twofivesixmask = -1 << 8;
 424  const u32 starthole = 0xE0000000;
 425  u32 startaddress = virt_to_bus (start);
 426  u32 lastaddress = startaddress+length-1;
 427  if ((startaddress ^ lastaddress) & fourmegmask ||
 428      (startaddress & twofivesixmask) == starthole) {
 429    PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 430            startaddress, lastaddress);
 431    return -1;
 432  } else {
 433    return 0;
 434  }
 435}
 436
 437/********** free an skb (as per ATM device driver documentation) **********/
 438
 439static void amb_kfree_skb (struct sk_buff * skb) {
 440  if (ATM_SKB(skb)->vcc->pop) {
 441    ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 442  } else {
 443    dev_kfree_skb_any (skb);
 444  }
 445}
 446
 447/********** TX completion **********/
 448
 449static void tx_complete (amb_dev * dev, tx_out * tx) {
 450  tx_simple * tx_descr = bus_to_virt (tx->handle);
 451  struct sk_buff * skb = tx_descr->skb;
 452  
 453  PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 454  
 455  // VC layer stats
 456  atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 457  
 458  // free the descriptor
 459  kfree (tx_descr);
 460  
 461  // free the skb
 462  amb_kfree_skb (skb);
 463  
 464  dev->stats.tx_ok++;
 465  return;
 466}
 467
 468/********** RX completion **********/
 469
 470static void rx_complete (amb_dev * dev, rx_out * rx) {
 471  struct sk_buff * skb = bus_to_virt (rx->handle);
 472  u16 vc = be16_to_cpu (rx->vc);
 473  // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 474  u16 status = be16_to_cpu (rx->status);
 475  u16 rx_len = be16_to_cpu (rx->length);
 476  
 477  PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 478  
 479  // XXX move this in and add to VC stats ???
 480  if (!status) {
 481    struct atm_vcc * atm_vcc = dev->rxer[vc];
 482    dev->stats.rx.ok++;
 483    
 484    if (atm_vcc) {
 485      
 486      if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 487        
 488        if (atm_charge (atm_vcc, skb->truesize)) {
 489          
 490          // prepare socket buffer
 491          ATM_SKB(skb)->vcc = atm_vcc;
 492          skb_put (skb, rx_len);
 493          
 494          dump_skb ("<<<", vc, skb);
 495          
 496          // VC layer stats
 497          atomic_inc(&atm_vcc->stats->rx);
 498          __net_timestamp(skb);
 499          // end of our responsability
 500          atm_vcc->push (atm_vcc, skb);
 501          return;
 502          
 503        } else {
 504          // someone fix this (message), please!
 505          PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 506          // drop stats incremented in atm_charge
 507        }
 508        
 509      } else {
 510        PRINTK (KERN_INFO, "dropped over-size frame");
 511        // should we count this?
 512        atomic_inc(&atm_vcc->stats->rx_drop);
 513      }
 514      
 515    } else {
 516      PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 517      // this is an adapter bug, only in new version of microcode
 518    }
 519    
 520  } else {
 521    dev->stats.rx.error++;
 522    if (status & CRC_ERR)
 523      dev->stats.rx.badcrc++;
 524    if (status & LEN_ERR)
 525      dev->stats.rx.toolong++;
 526    if (status & ABORT_ERR)
 527      dev->stats.rx.aborted++;
 528    if (status & UNUSED_ERR)
 529      dev->stats.rx.unused++;
 530  }
 531  
 532  dev_kfree_skb_any (skb);
 533  return;
 534}
 535
 536/*
 537  
 538  Note on queue handling.
 539  
 540  Here "give" and "take" refer to queue entries and a queue (pair)
 541  rather than frames to or from the host or adapter. Empty frame
 542  buffers are given to the RX queue pair and returned unused or
 543  containing RX frames. TX frames (well, pointers to TX fragment
 544  lists) are given to the TX queue pair, completions are returned.
 545  
 546*/
 547
 548/********** command queue **********/
 549
 550// I really don't like this, but it's the best I can do at the moment
 551
 552// also, the callers are responsible for byte order as the microcode
 553// sometimes does 16-bit accesses (yuk yuk yuk)
 554
 555static int command_do (amb_dev * dev, command * cmd) {
 556  amb_cq * cq = &dev->cq;
 557  volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 558  command * my_slot;
 559  
 560  PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 561  
 562  if (test_bit (dead, &dev->flags))
 563    return 0;
 564  
 565  spin_lock (&cq->lock);
 566  
 567  // if not full...
 568  if (cq->pending < cq->maximum) {
 569    // remember my slot for later
 570    my_slot = ptrs->in;
 571    PRINTD (DBG_CMD, "command in slot %p", my_slot);
 572    
 573    dump_command (cmd);
 574    
 575    // copy command in
 576    *ptrs->in = *cmd;
 577    cq->pending++;
 578    ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 579    
 580    // mail the command
 581    wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 582    
 583    if (cq->pending > cq->high)
 584      cq->high = cq->pending;
 585    spin_unlock (&cq->lock);
 586    
 587    // these comments were in a while-loop before, msleep removes the loop
 588    // go to sleep
 589    // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 590    msleep(cq->pending);
 591    
 592    // wait for my slot to be reached (all waiters are here or above, until...)
 593    while (ptrs->out != my_slot) {
 594      PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 595      set_current_state(TASK_UNINTERRUPTIBLE);
 596      schedule();
 597    }
 598    
 599    // wait on my slot (... one gets to its slot, and... )
 600    while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 601      PRINTD (DBG_CMD, "wait: command slot completion");
 602      set_current_state(TASK_UNINTERRUPTIBLE);
 603      schedule();
 604    }
 605    
 606    PRINTD (DBG_CMD, "command complete");
 607    // update queue (... moves the queue along to the next slot)
 608    spin_lock (&cq->lock);
 609    cq->pending--;
 610    // copy command out
 611    *cmd = *ptrs->out;
 612    ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 613    spin_unlock (&cq->lock);
 614    
 615    return 0;
 616  } else {
 617    cq->filled++;
 618    spin_unlock (&cq->lock);
 619    return -EAGAIN;
 620  }
 621  
 622}
 623
 624/********** TX queue pair **********/
 625
 626static int tx_give (amb_dev * dev, tx_in * tx) {
 627  amb_txq * txq = &dev->txq;
 628  unsigned long flags;
 629  
 630  PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 631
 632  if (test_bit (dead, &dev->flags))
 633    return 0;
 634  
 635  spin_lock_irqsave (&txq->lock, flags);
 636  
 637  if (txq->pending < txq->maximum) {
 638    PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 639
 640    *txq->in.ptr = *tx;
 641    txq->pending++;
 642    txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 643    // hand over the TX and ring the bell
 644    wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 645    wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 646    
 647    if (txq->pending > txq->high)
 648      txq->high = txq->pending;
 649    spin_unlock_irqrestore (&txq->lock, flags);
 650    return 0;
 651  } else {
 652    txq->filled++;
 653    spin_unlock_irqrestore (&txq->lock, flags);
 654    return -EAGAIN;
 655  }
 656}
 657
 658static int tx_take (amb_dev * dev) {
 659  amb_txq * txq = &dev->txq;
 660  unsigned long flags;
 661  
 662  PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 663  
 664  spin_lock_irqsave (&txq->lock, flags);
 665  
 666  if (txq->pending && txq->out.ptr->handle) {
 667    // deal with TX completion
 668    tx_complete (dev, txq->out.ptr);
 669    // mark unused again
 670    txq->out.ptr->handle = 0;
 671    // remove item
 672    txq->pending--;
 673    txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 674    
 675    spin_unlock_irqrestore (&txq->lock, flags);
 676    return 0;
 677  } else {
 678    
 679    spin_unlock_irqrestore (&txq->lock, flags);
 680    return -1;
 681  }
 682}
 683
 684/********** RX queue pairs **********/
 685
 686static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 687  amb_rxq * rxq = &dev->rxq[pool];
 688  unsigned long flags;
 689  
 690  PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 691  
 692  spin_lock_irqsave (&rxq->lock, flags);
 693  
 694  if (rxq->pending < rxq->maximum) {
 695    PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 696
 697    *rxq->in.ptr = *rx;
 698    rxq->pending++;
 699    rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 700    // hand over the RX buffer
 701    wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 702    
 703    spin_unlock_irqrestore (&rxq->lock, flags);
 704    return 0;
 705  } else {
 706    spin_unlock_irqrestore (&rxq->lock, flags);
 707    return -1;
 708  }
 709}
 710
 711static int rx_take (amb_dev * dev, unsigned char pool) {
 712  amb_rxq * rxq = &dev->rxq[pool];
 713  unsigned long flags;
 714  
 715  PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 716  
 717  spin_lock_irqsave (&rxq->lock, flags);
 718  
 719  if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 720    // deal with RX completion
 721    rx_complete (dev, rxq->out.ptr);
 722    // mark unused again
 723    rxq->out.ptr->status = 0;
 724    rxq->out.ptr->length = 0;
 725    // remove item
 726    rxq->pending--;
 727    rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 728    
 729    if (rxq->pending < rxq->low)
 730      rxq->low = rxq->pending;
 731    spin_unlock_irqrestore (&rxq->lock, flags);
 732    return 0;
 733  } else {
 734    if (!rxq->pending && rxq->buffers_wanted)
 735      rxq->emptied++;
 736    spin_unlock_irqrestore (&rxq->lock, flags);
 737    return -1;
 738  }
 739}
 740
 741/********** RX Pool handling **********/
 742
 743/* pre: buffers_wanted = 0, post: pending = 0 */
 744static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 745  amb_rxq * rxq = &dev->rxq[pool];
 746  
 747  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 748  
 749  if (test_bit (dead, &dev->flags))
 750    return;
 751  
 752  /* we are not quite like the fill pool routines as we cannot just
 753     remove one buffer, we have to remove all of them, but we might as
 754     well pretend... */
 755  if (rxq->pending > rxq->buffers_wanted) {
 756    command cmd;
 757    cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 758    cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 759    while (command_do (dev, &cmd))
 760      schedule();
 761    /* the pool may also be emptied via the interrupt handler */
 762    while (rxq->pending > rxq->buffers_wanted)
 763      if (rx_take (dev, pool))
 764        schedule();
 765  }
 766  
 767  return;
 768}
 769
 770static void drain_rx_pools (amb_dev * dev) {
 771  unsigned char pool;
 772  
 773  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 774  
 775  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 776    drain_rx_pool (dev, pool);
 777}
 778
 779static void fill_rx_pool (amb_dev * dev, unsigned char pool,
 780                                 gfp_t priority)
 781{
 782  rx_in rx;
 783  amb_rxq * rxq;
 784  
 785  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 786  
 787  if (test_bit (dead, &dev->flags))
 788    return;
 789  
 790  rxq = &dev->rxq[pool];
 791  while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 792    
 793    struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 794    if (!skb) {
 795      PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 796      return;
 797    }
 798    if (check_area (skb->data, skb->truesize)) {
 799      dev_kfree_skb_any (skb);
 800      return;
 801    }
 802    // cast needed as there is no %? for pointer differences
 803    PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 804            skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
 805    rx.handle = virt_to_bus (skb);
 806    rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 807    if (rx_give (dev, &rx, pool))
 808      dev_kfree_skb_any (skb);
 809    
 810  }
 811  
 812  return;
 813}
 814
 815// top up all RX pools (can also be called as a bottom half)
 816static void fill_rx_pools (amb_dev * dev) {
 817  unsigned char pool;
 818  
 819  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 820  
 821  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 822    fill_rx_pool (dev, pool, GFP_ATOMIC);
 823  
 824  return;
 825}
 826
 827/********** enable host interrupts **********/
 828
 829static void interrupts_on (amb_dev * dev) {
 830  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 831            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 832            | AMB_INTERRUPT_BITS);
 833}
 834
 835/********** disable host interrupts **********/
 836
 837static void interrupts_off (amb_dev * dev) {
 838  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 839            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 840            &~ AMB_INTERRUPT_BITS);
 841}
 842
 843/********** interrupt handling **********/
 844
 845static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 846  amb_dev * dev = dev_id;
 847  
 848  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 849  
 850  {
 851    u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 852  
 853    // for us or someone else sharing the same interrupt
 854    if (!interrupt) {
 855      PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 856      return IRQ_NONE;
 857    }
 858    
 859    // definitely for us
 860    PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 861    wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 862  }
 863  
 864  {
 865    unsigned int irq_work = 0;
 866    unsigned char pool;
 867    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 868      while (!rx_take (dev, pool))
 869        ++irq_work;
 870    while (!tx_take (dev))
 871      ++irq_work;
 872  
 873    if (irq_work) {
 874#ifdef FILL_RX_POOLS_IN_BH
 875      schedule_work (&dev->bh);
 876#else
 877      fill_rx_pools (dev);
 878#endif
 879
 880      PRINTD (DBG_IRQ, "work done: %u", irq_work);
 881    } else {
 882      PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 883    }
 884  }
 885  
 886  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 887  return IRQ_HANDLED;
 888}
 889
 890/********** make rate (not quite as much fun as Horizon) **********/
 891
 892static int make_rate (unsigned int rate, rounding r,
 893                      u16 * bits, unsigned int * actual) {
 894  unsigned char exp = -1; // hush gcc
 895  unsigned int man = -1;  // hush gcc
 896  
 897  PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 898  
 899  // rates in cells per second, ITU format (nasty 16-bit floating-point)
 900  // given 5-bit e and 9-bit m:
 901  // rate = EITHER (1+m/2^9)*2^e    OR 0
 902  // bits = EITHER 1<<14 | e<<9 | m OR 0
 903  // (bit 15 is "reserved", bit 14 "non-zero")
 904  // smallest rate is 0 (special representation)
 905  // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 906  // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 907  // simple algorithm:
 908  // find position of top bit, this gives e
 909  // remove top bit and shift (rounding if feeling clever) by 9-e
 910  
 911  // ucode bug: please don't set bit 14! so 0 rate not representable
 912  
 913  if (rate > 0xffc00000U) {
 914    // larger than largest representable rate
 915    
 916    if (r == round_up) {
 917        return -EINVAL;
 918    } else {
 919      exp = 31;
 920      man = 511;
 921    }
 922    
 923  } else if (rate) {
 924    // representable rate
 925    
 926    exp = 31;
 927    man = rate;
 928    
 929    // invariant: rate = man*2^(exp-31)
 930    while (!(man & (1<<31))) {
 931      exp = exp - 1;
 932      man = man<<1;
 933    }
 934    
 935    // man has top bit set
 936    // rate = (2^31+(man-2^31))*2^(exp-31)
 937    // rate = (1+(man-2^31)/2^31)*2^exp
 938    man = man<<1;
 939    man &= 0xffffffffU; // a nop on 32-bit systems
 940    // rate = (1+man/2^32)*2^exp
 941    
 942    // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 943    // time to lose significance... we want m in the range 0 to 2^9-1
 944    // rounding presents a minor problem... we first decide which way
 945    // we are rounding (based on given rounding direction and possibly
 946    // the bits of the mantissa that are to be discarded).
 947    
 948    switch (r) {
 949      case round_down: {
 950        // just truncate
 951        man = man>>(32-9);
 952        break;
 953      }
 954      case round_up: {
 955        // check all bits that we are discarding
 956        if (man & (~0U>>9)) {
 957          man = (man>>(32-9)) + 1;
 958          if (man == (1<<9)) {
 959            // no need to check for round up outside of range
 960            man = 0;
 961            exp += 1;
 962          }
 963        } else {
 964          man = (man>>(32-9));
 965        }
 966        break;
 967      }
 968      case round_nearest: {
 969        // check msb that we are discarding
 970        if (man & (1<<(32-9-1))) {
 971          man = (man>>(32-9)) + 1;
 972          if (man == (1<<9)) {
 973            // no need to check for round up outside of range
 974            man = 0;
 975            exp += 1;
 976          }
 977        } else {
 978          man = (man>>(32-9));
 979        }
 980        break;
 981      }
 982    }
 983    
 984  } else {
 985    // zero rate - not representable
 986    
 987    if (r == round_down) {
 988      return -EINVAL;
 989    } else {
 990      exp = 0;
 991      man = 0;
 992    }
 993    
 994  }
 995  
 996  PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
 997  
 998  if (bits)
 999    *bits = /* (1<<14) | */ (exp<<9) | man;
1000  
1001  if (actual)
1002    *actual = (exp >= 9)
1003      ? (1 << exp) + (man << (exp-9))
1004      : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1005  
1006  return 0;
1007}
1008
1009/********** Linux ATM Operations **********/
1010
1011// some are not yet implemented while others do not make sense for
1012// this device
1013
1014/********** Open a VC **********/
1015
1016static int amb_open (struct atm_vcc * atm_vcc)
1017{
1018  int error;
1019  
1020  struct atm_qos * qos;
1021  struct atm_trafprm * txtp;
1022  struct atm_trafprm * rxtp;
1023  u16 tx_rate_bits = -1; // hush gcc
1024  u16 tx_vc_bits = -1; // hush gcc
1025  u16 tx_frame_bits = -1; // hush gcc
1026  
1027  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1028  amb_vcc * vcc;
1029  unsigned char pool = -1; // hush gcc
1030  short vpi = atm_vcc->vpi;
1031  int vci = atm_vcc->vci;
1032  
1033  PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1034  
1035#ifdef ATM_VPI_UNSPEC
1036  // UNSPEC is deprecated, remove this code eventually
1037  if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1038    PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1039    return -EINVAL;
1040  }
1041#endif
1042  
1043  if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1044        0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1045    PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1046    return -EINVAL;
1047  }
1048  
1049  qos = &atm_vcc->qos;
1050  
1051  if (qos->aal != ATM_AAL5) {
1052    PRINTD (DBG_QOS, "AAL not supported");
1053    return -EINVAL;
1054  }
1055  
1056  // traffic parameters
1057  
1058  PRINTD (DBG_QOS, "TX:");
1059  txtp = &qos->txtp;
1060  if (txtp->traffic_class != ATM_NONE) {
1061    switch (txtp->traffic_class) {
1062      case ATM_UBR: {
1063        // we take "the PCR" as a rate-cap
1064        int pcr = atm_pcr_goal (txtp);
1065        if (!pcr) {
1066          // no rate cap
1067          tx_rate_bits = 0;
1068          tx_vc_bits = TX_UBR;
1069          tx_frame_bits = TX_FRAME_NOTCAP;
1070        } else {
1071          rounding r;
1072          if (pcr < 0) {
1073            r = round_down;
1074            pcr = -pcr;
1075          } else {
1076            r = round_up;
1077          }
1078          error = make_rate (pcr, r, &tx_rate_bits, NULL);
1079          if (error)
1080            return error;
1081          tx_vc_bits = TX_UBR_CAPPED;
1082          tx_frame_bits = TX_FRAME_CAPPED;
1083        }
1084        break;
1085      }
1086#if 0
1087      case ATM_ABR: {
1088        pcr = atm_pcr_goal (txtp);
1089        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1090        break;
1091      }
1092#endif
1093      default: {
1094        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1095        PRINTD (DBG_QOS, "request for non-UBR denied");
1096        return -EINVAL;
1097      }
1098    }
1099    PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1100            tx_rate_bits, tx_vc_bits);
1101  }
1102  
1103  PRINTD (DBG_QOS, "RX:");
1104  rxtp = &qos->rxtp;
1105  if (rxtp->traffic_class == ATM_NONE) {
1106    // do nothing
1107  } else {
1108    // choose an RX pool (arranged in increasing size)
1109    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1110      if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1111        PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1112                pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1113        break;
1114      }
1115    if (pool == NUM_RX_POOLS) {
1116      PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1117              "no pool suitable for VC (RX max_sdu %d is too large)",
1118              rxtp->max_sdu);
1119      return -EINVAL;
1120    }
1121    
1122    switch (rxtp->traffic_class) {
1123      case ATM_UBR: {
1124        break;
1125      }
1126#if 0
1127      case ATM_ABR: {
1128        pcr = atm_pcr_goal (rxtp);
1129        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1130        break;
1131      }
1132#endif
1133      default: {
1134        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1135        PRINTD (DBG_QOS, "request for non-UBR denied");
1136        return -EINVAL;
1137      }
1138    }
1139  }
1140  
1141  // get space for our vcc stuff
1142  vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1143  if (!vcc) {
1144    PRINTK (KERN_ERR, "out of memory!");
1145    return -ENOMEM;
1146  }
1147  atm_vcc->dev_data = (void *) vcc;
1148  
1149  // no failures beyond this point
1150  
1151  // we are not really "immediately before allocating the connection
1152  // identifier in hardware", but it will just have to do!
1153  set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1154  
1155  if (txtp->traffic_class != ATM_NONE) {
1156    command cmd;
1157    
1158    vcc->tx_frame_bits = tx_frame_bits;
1159    
1160    mutex_lock(&dev->vcc_sf);
1161    if (dev->rxer[vci]) {
1162      // RXer on the channel already, just modify rate...
1163      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1164      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1165      cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1166      while (command_do (dev, &cmd))
1167        schedule();
1168      // ... and TX flags, preserving the RX pool
1169      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1170      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1171      cmd.args.modify_flags.flags = cpu_to_be32
1172        ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1173          | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1174      while (command_do (dev, &cmd))
1175        schedule();
1176    } else {
1177      // no RXer on the channel, just open (with pool zero)
1178      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1179      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1180      cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1181      cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1182      while (command_do (dev, &cmd))
1183        schedule();
1184    }
1185    dev->txer[vci].tx_present = 1;
1186    mutex_unlock(&dev->vcc_sf);
1187  }
1188  
1189  if (rxtp->traffic_class != ATM_NONE) {
1190    command cmd;
1191    
1192    vcc->rx_info.pool = pool;
1193    
1194    mutex_lock(&dev->vcc_sf);
1195    /* grow RX buffer pool */
1196    if (!dev->rxq[pool].buffers_wanted)
1197      dev->rxq[pool].buffers_wanted = rx_lats;
1198    dev->rxq[pool].buffers_wanted += 1;
1199    fill_rx_pool (dev, pool, GFP_KERNEL);
1200    
1201    if (dev->txer[vci].tx_present) {
1202      // TXer on the channel already
1203      // switch (from pool zero) to this pool, preserving the TX bits
1204      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1205      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1206      cmd.args.modify_flags.flags = cpu_to_be32
1207        ( (pool << SRB_POOL_SHIFT)
1208          | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1209    } else {
1210      // no TXer on the channel, open the VC (with no rate info)
1211      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1212      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1213      cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1214      cmd.args.open.rate = cpu_to_be32 (0);
1215    }
1216    while (command_do (dev, &cmd))
1217      schedule();
1218    // this link allows RX frames through
1219    dev->rxer[vci] = atm_vcc;
1220    mutex_unlock(&dev->vcc_sf);
1221  }
1222  
1223  // indicate readiness
1224  set_bit(ATM_VF_READY,&atm_vcc->flags);
1225  
1226  return 0;
1227}
1228
1229/********** Close a VC **********/
1230
1231static void amb_close (struct atm_vcc * atm_vcc) {
1232  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1233  amb_vcc * vcc = AMB_VCC (atm_vcc);
1234  u16 vci = atm_vcc->vci;
1235  
1236  PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1237  
1238  // indicate unreadiness
1239  clear_bit(ATM_VF_READY,&atm_vcc->flags);
1240  
1241  // disable TXing
1242  if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1243    command cmd;
1244    
1245    mutex_lock(&dev->vcc_sf);
1246    if (dev->rxer[vci]) {
1247      // RXer still on the channel, just modify rate... XXX not really needed
1248      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1249      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1250      cmd.args.modify_rate.rate = cpu_to_be32 (0);
1251      // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1252    } else {
1253      // no RXer on the channel, close channel
1254      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1255      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1256    }
1257    dev->txer[vci].tx_present = 0;
1258    while (command_do (dev, &cmd))
1259      schedule();
1260    mutex_unlock(&dev->vcc_sf);
1261  }
1262  
1263  // disable RXing
1264  if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1265    command cmd;
1266    
1267    // this is (the?) one reason why we need the amb_vcc struct
1268    unsigned char pool = vcc->rx_info.pool;
1269    
1270    mutex_lock(&dev->vcc_sf);
1271    if (dev->txer[vci].tx_present) {
1272      // TXer still on the channel, just go to pool zero XXX not really needed
1273      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1274      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1275      cmd.args.modify_flags.flags = cpu_to_be32
1276        (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1277    } else {
1278      // no TXer on the channel, close the VC
1279      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1280      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1281    }
1282    // forget the rxer - no more skbs will be pushed
1283    if (atm_vcc != dev->rxer[vci])
1284      PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1285              "arghhh! we're going to die!",
1286              vcc, dev->rxer[vci]);
1287    dev->rxer[vci] = NULL;
1288    while (command_do (dev, &cmd))
1289      schedule();
1290    
1291    /* shrink RX buffer pool */
1292    dev->rxq[pool].buffers_wanted -= 1;
1293    if (dev->rxq[pool].buffers_wanted == rx_lats) {
1294      dev->rxq[pool].buffers_wanted = 0;
1295      drain_rx_pool (dev, pool);
1296    }
1297    mutex_unlock(&dev->vcc_sf);
1298  }
1299  
1300  // free our structure
1301  kfree (vcc);
1302  
1303  // say the VPI/VCI is free again
1304  clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1305
1306  return;
1307}
1308
1309/********** Send **********/
1310
1311static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1312  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1313  amb_vcc * vcc = AMB_VCC(atm_vcc);
1314  u16 vc = atm_vcc->vci;
1315  unsigned int tx_len = skb->len;
1316  unsigned char * tx_data = skb->data;
1317  tx_simple * tx_descr;
1318  tx_in tx;
1319  
1320  if (test_bit (dead, &dev->flags))
1321    return -EIO;
1322  
1323  PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1324          vc, tx_data, tx_len);
1325  
1326  dump_skb (">>>", vc, skb);
1327  
1328  if (!dev->txer[vc].tx_present) {
1329    PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1330    return -EBADFD;
1331  }
1332  
1333  // this is a driver private field so we have to set it ourselves,
1334  // despite the fact that we are _required_ to use it to check for a
1335  // pop function
1336  ATM_SKB(skb)->vcc = atm_vcc;
1337  
1338  if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1339    PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1340    return -EIO;
1341  }
1342  
1343  if (check_area (skb->data, skb->len)) {
1344    atomic_inc(&atm_vcc->stats->tx_err);
1345    return -ENOMEM; // ?
1346  }
1347  
1348  // allocate memory for fragments
1349  tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1350  if (!tx_descr) {
1351    PRINTK (KERN_ERR, "could not allocate TX descriptor");
1352    return -ENOMEM;
1353  }
1354  if (check_area (tx_descr, sizeof(tx_simple))) {
1355    kfree (tx_descr);
1356    return -ENOMEM;
1357  }
1358  PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1359  
1360  tx_descr->skb = skb;
1361  
1362  tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1363  tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1364  
1365  tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1366  tx_descr->tx_frag_end.vc = 0;
1367  tx_descr->tx_frag_end.next_descriptor_length = 0;
1368  tx_descr->tx_frag_end.next_descriptor = 0;
1369#ifdef AMB_NEW_MICROCODE
1370  tx_descr->tx_frag_end.cpcs_uu = 0;
1371  tx_descr->tx_frag_end.cpi = 0;
1372  tx_descr->tx_frag_end.pad = 0;
1373#endif
1374  
1375  tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1376  tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1377  tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1378  
1379  while (tx_give (dev, &tx))
1380    schedule();
1381  return 0;
1382}
1383
1384/********** Change QoS on a VC **********/
1385
1386// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1387
1388/********** Free RX Socket Buffer **********/
1389
1390#if 0
1391static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1392  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1393  amb_vcc * vcc = AMB_VCC (atm_vcc);
1394  unsigned char pool = vcc->rx_info.pool;
1395  rx_in rx;
1396  
1397  // This may be unsafe for various reasons that I cannot really guess
1398  // at. However, I note that the ATM layer calls kfree_skb rather
1399  // than dev_kfree_skb at this point so we are least covered as far
1400  // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1401
1402  PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1403          skb, atm_vcc, vcc);
1404  
1405  rx.handle = virt_to_bus (skb);
1406  rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1407  
1408  skb->data = skb->head;
1409  skb->tail = skb->head;
1410  skb->len = 0;
1411  
1412  if (!rx_give (dev, &rx, pool)) {
1413    // success
1414    PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1415    return;
1416  }
1417  
1418  // just do what the ATM layer would have done
1419  dev_kfree_skb_any (skb);
1420  
1421  return;
1422}
1423#endif
1424
1425/********** Proc File Output **********/
1426
1427static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1428  amb_dev * dev = AMB_DEV (atm_dev);
1429  int left = *pos;
1430  unsigned char pool;
1431  
1432  PRINTD (DBG_FLOW, "amb_proc_read");
1433  
1434  /* more diagnostics here? */
1435  
1436  if (!left--) {
1437    amb_stats * s = &dev->stats;
1438    return sprintf (page,
1439                    "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1440                    "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1441                    s->tx_ok, s->rx.ok, s->rx.error,
1442                    s->rx.badcrc, s->rx.toolong,
1443                    s->rx.aborted, s->rx.unused);
1444  }
1445  
1446  if (!left--) {
1447    amb_cq * c = &dev->cq;
1448    return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1449                    c->pending, c->high, c->maximum);
1450  }
1451  
1452  if (!left--) {
1453    amb_txq * t = &dev->txq;
1454    return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1455                    t->pending, t->maximum, t->high, t->filled);
1456  }
1457  
1458  if (!left--) {
1459    unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1460    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1461      amb_rxq * r = &dev->rxq[pool];
1462      count += sprintf (page+count, " %u/%u/%u %u %u",
1463                        r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1464    }
1465    count += sprintf (page+count, ".\n");
1466    return count;
1467  }
1468  
1469  if (!left--) {
1470    unsigned int count = sprintf (page, "RX buffer sizes:");
1471    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1472      amb_rxq * r = &dev->rxq[pool];
1473      count += sprintf (page+count, " %u", r->buffer_size);
1474    }
1475    count += sprintf (page+count, ".\n");
1476    return count;
1477  }
1478  
1479#if 0
1480  if (!left--) {
1481    // suni block etc?
1482  }
1483#endif
1484  
1485  return 0;
1486}
1487
1488/********** Operation Structure **********/
1489
1490static const struct atmdev_ops amb_ops = {
1491  .open         = amb_open,
1492  .close        = amb_close,
1493  .send         = amb_send,
1494  .proc_read    = amb_proc_read,
1495  .owner        = THIS_MODULE,
1496};
1497
1498/********** housekeeping **********/
1499static void do_housekeeping (unsigned long arg) {
1500  amb_dev * dev = (amb_dev *) arg;
1501  
1502  // could collect device-specific (not driver/atm-linux) stats here
1503      
1504  // last resort refill once every ten seconds
1505  fill_rx_pools (dev);
1506  mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1507  
1508  return;
1509}
1510
1511/********** creation of communication queues **********/
1512
1513static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1514                                 unsigned int txs, unsigned int * rxs,
1515                                 unsigned int * rx_buffer_sizes) {
1516  unsigned char pool;
1517  size_t total = 0;
1518  void * memory;
1519  void * limit;
1520  
1521  PRINTD (DBG_FLOW, "create_queues %p", dev);
1522  
1523  total += cmds * sizeof(command);
1524  
1525  total += txs * (sizeof(tx_in) + sizeof(tx_out));
1526  
1527  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1528    total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1529  
1530  memory = kmalloc (total, GFP_KERNEL);
1531  if (!memory) {
1532    PRINTK (KERN_ERR, "could not allocate queues");
1533    return -ENOMEM;
1534  }
1535  if (check_area (memory, total)) {
1536    PRINTK (KERN_ERR, "queues allocated in nasty area");
1537    kfree (memory);
1538    return -ENOMEM;
1539  }
1540  
1541  limit = memory + total;
1542  PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1543  
1544  PRINTD (DBG_CMD, "command queue at %p", memory);
1545  
1546  {
1547    command * cmd = memory;
1548    amb_cq * cq = &dev->cq;
1549    
1550    cq->pending = 0;
1551    cq->high = 0;
1552    cq->maximum = cmds - 1;
1553    
1554    cq->ptrs.start = cmd;
1555    cq->ptrs.in = cmd;
1556    cq->ptrs.out = cmd;
1557    cq->ptrs.limit = cmd + cmds;
1558    
1559    memory = cq->ptrs.limit;
1560  }
1561  
1562  PRINTD (DBG_TX, "TX queue pair at %p", memory);
1563  
1564  {
1565    tx_in * in = memory;
1566    tx_out * out;
1567    amb_txq * txq = &dev->txq;
1568    
1569    txq->pending = 0;
1570    txq->high = 0;
1571    txq->filled = 0;
1572    txq->maximum = txs - 1;
1573    
1574    txq->in.start = in;
1575    txq->in.ptr = in;
1576    txq->in.limit = in + txs;
1577    
1578    memory = txq->in.limit;
1579    out = memory;
1580    
1581    txq->out.start = out;
1582    txq->out.ptr = out;
1583    txq->out.limit = out + txs;
1584    
1585    memory = txq->out.limit;
1586  }
1587  
1588  PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1589  
1590  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1591    rx_in * in = memory;
1592    rx_out * out;
1593    amb_rxq * rxq = &dev->rxq[pool];
1594    
1595    rxq->buffer_size = rx_buffer_sizes[pool];
1596    rxq->buffers_wanted = 0;
1597    
1598    rxq->pending = 0;
1599    rxq->low = rxs[pool] - 1;
1600    rxq->emptied = 0;
1601    rxq->maximum = rxs[pool] - 1;
1602    
1603    rxq->in.start = in;
1604    rxq->in.ptr = in;
1605    rxq->in.limit = in + rxs[pool];
1606    
1607    memory = rxq->in.limit;
1608    out = memory;
1609    
1610    rxq->out.start = out;
1611    rxq->out.ptr = out;
1612    rxq->out.limit = out + rxs[pool];
1613    
1614    memory = rxq->out.limit;
1615  }
1616  
1617  if (memory == limit) {
1618    return 0;
1619  } else {
1620    PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1621    kfree (limit - total);
1622    return -ENOMEM;
1623  }
1624  
1625}
1626
1627/********** destruction of communication queues **********/
1628
1629static void destroy_queues (amb_dev * dev) {
1630  // all queues assumed empty
1631  void * memory = dev->cq.ptrs.start;
1632  // includes txq.in, txq.out, rxq[].in and rxq[].out
1633  
1634  PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1635  
1636  PRINTD (DBG_INIT, "freeing queues at %p", memory);
1637  kfree (memory);
1638  
1639  return;
1640}
1641
1642/********** basic loader commands and error handling **********/
1643// centisecond timeouts - guessing away here
1644static unsigned int command_timeouts [] = {
1645        [host_memory_test]     = 15,
1646        [read_adapter_memory]  = 2,
1647        [write_adapter_memory] = 2,
1648        [adapter_start]        = 50,
1649        [get_version_number]   = 10,
1650        [interrupt_host]       = 1,
1651        [flash_erase_sector]   = 1,
1652        [adap_download_block]  = 1,
1653        [adap_erase_flash]     = 1,
1654        [adap_run_in_iram]     = 1,
1655        [adap_end_download]    = 1
1656};
1657
1658
1659static unsigned int command_successes [] = {
1660        [host_memory_test]     = COMMAND_PASSED_TEST,
1661        [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1662        [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1663        [adapter_start]        = COMMAND_COMPLETE,
1664        [get_version_number]   = COMMAND_COMPLETE,
1665        [interrupt_host]       = COMMAND_COMPLETE,
1666        [flash_erase_sector]   = COMMAND_COMPLETE,
1667        [adap_download_block]  = COMMAND_COMPLETE,
1668        [adap_erase_flash]     = COMMAND_COMPLETE,
1669        [adap_run_in_iram]     = COMMAND_COMPLETE,
1670        [adap_end_download]    = COMMAND_COMPLETE
1671};
1672  
1673static  int decode_loader_result (loader_command cmd, u32 result)
1674{
1675        int res;
1676        const char *msg;
1677
1678        if (result == command_successes[cmd])
1679                return 0;
1680
1681        switch (result) {
1682                case BAD_COMMAND:
1683                        res = -EINVAL;
1684                        msg = "bad command";
1685                        break;
1686                case COMMAND_IN_PROGRESS:
1687                        res = -ETIMEDOUT;
1688                        msg = "command in progress";
1689                        break;
1690                case COMMAND_PASSED_TEST:
1691                        res = 0;
1692                        msg = "command passed test";
1693                        break;
1694                case COMMAND_FAILED_TEST:
1695                        res = -EIO;
1696                        msg = "command failed test";
1697                        break;
1698                case COMMAND_READ_DATA_OK:
1699                        res = 0;
1700                        msg = "command read data ok";
1701                        break;
1702                case COMMAND_READ_BAD_ADDRESS:
1703                        res = -EINVAL;
1704                        msg = "command read bad address";
1705                        break;
1706                case COMMAND_WRITE_DATA_OK:
1707                        res = 0;
1708                        msg = "command write data ok";
1709                        break;
1710                case COMMAND_WRITE_BAD_ADDRESS:
1711                        res = -EINVAL;
1712                        msg = "command write bad address";
1713                        break;
1714                case COMMAND_WRITE_FLASH_FAILURE:
1715                        res = -EIO;
1716                        msg = "command write flash failure";
1717                        break;
1718                case COMMAND_COMPLETE:
1719                        res = 0;
1720                        msg = "command complete";
1721                        break;
1722                case COMMAND_FLASH_ERASE_FAILURE:
1723                        res = -EIO;
1724                        msg = "command flash erase failure";
1725                        break;
1726                case COMMAND_WRITE_BAD_DATA:
1727                        res = -EINVAL;
1728                        msg = "command write bad data";
1729                        break;
1730                default:
1731                        res = -EINVAL;
1732                        msg = "unknown error";
1733                        PRINTD (DBG_LOAD|DBG_ERR,
1734                                "decode_loader_result got %d=%x !",
1735                                result, result);
1736                        break;
1737        }
1738
1739        PRINTK (KERN_ERR, "%s", msg);
1740        return res;
1741}
1742
1743static int __devinit do_loader_command (volatile loader_block * lb,
1744                                     const amb_dev * dev, loader_command cmd) {
1745  
1746  unsigned long timeout;
1747  
1748  PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1749  
1750  /* do a command
1751     
1752     Set the return value to zero, set the command type and set the
1753     valid entry to the right magic value. The payload is already
1754     correctly byte-ordered so we leave it alone. Hit the doorbell
1755     with the bus address of this structure.
1756     
1757  */
1758  
1759  lb->result = 0;
1760  lb->command = cpu_to_be32 (cmd);
1761  lb->valid = cpu_to_be32 (DMA_VALID);
1762  // dump_registers (dev);
1763  // dump_loader_block (lb);
1764  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1765  
1766  timeout = command_timeouts[cmd] * 10;
1767  
1768  while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1769    if (timeout) {
1770      timeout = msleep_interruptible(timeout);
1771    } else {
1772      PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1773      dump_registers (dev);
1774      dump_loader_block (lb);
1775      return -ETIMEDOUT;
1776    }
1777  
1778  if (cmd == adapter_start) {
1779    // wait for start command to acknowledge...
1780    timeout = 100;
1781    while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1782      if (timeout) {
1783        timeout = msleep_interruptible(timeout);
1784      } else {
1785        PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1786                be32_to_cpu (lb->result));
1787        dump_registers (dev);
1788        return -ETIMEDOUT;
1789      }
1790    return 0;
1791  } else {
1792    return decode_loader_result (cmd, be32_to_cpu (lb->result));
1793  }
1794  
1795}
1796
1797/* loader: determine loader version */
1798
1799static int __devinit get_loader_version (loader_block * lb,
1800                                      const amb_dev * dev, u32 * version) {
1801  int res;
1802  
1803  PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1804  
1805  res = do_loader_command (lb, dev, get_version_number);
1806  if (res)
1807    return res;
1808  if (version)
1809    *version = be32_to_cpu (lb->payload.version);
1810  return 0;
1811}
1812
1813/* loader: write memory data blocks */
1814
1815static int __devinit loader_write (loader_block* lb,
1816                                   const amb_dev *dev,
1817                                   const struct ihex_binrec *rec) {
1818  transfer_block * tb = &lb->payload.transfer;
1819  
1820  PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1821
1822  tb->address = rec->addr;
1823  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1824  memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
1825  return do_loader_command (lb, dev, write_adapter_memory);
1826}
1827
1828/* loader: verify memory data blocks */
1829
1830static int __devinit loader_verify (loader_block * lb,
1831                                    const amb_dev *dev,
1832                                    const struct ihex_binrec *rec) {
1833  transfer_block * tb = &lb->payload.transfer;
1834  int res;
1835  
1836  PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1837  
1838  tb->address = rec->addr;
1839  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1840  res = do_loader_command (lb, dev, read_adapter_memory);
1841  if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
1842    res = -EINVAL;
1843  return res;
1844}
1845
1846/* loader: start microcode */
1847
1848static int __devinit loader_start (loader_block * lb,
1849                                const amb_dev * dev, u32 address) {
1850  PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1851  
1852  lb->payload.start = cpu_to_be32 (address);
1853  return do_loader_command (lb, dev, adapter_start);
1854}
1855
1856/********** reset card **********/
1857
1858static inline void sf (const char * msg)
1859{
1860        PRINTK (KERN_ERR, "self-test failed: %s", msg);
1861}
1862
1863static int amb_reset (amb_dev * dev, int diags) {
1864  u32 word;
1865  
1866  PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1867  
1868  word = rd_plain (dev, offsetof(amb_mem, reset_control));
1869  // put card into reset state
1870  wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1871  // wait a short while
1872  udelay (10);
1873#if 1
1874  // put card into known good state
1875  wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1876  // clear all interrupts just in case
1877  wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1878#endif
1879  // clear self-test done flag
1880  wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1881  // take card out of reset state
1882  wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1883  
1884  if (diags) { 
1885    unsigned long timeout;
1886    // 4.2 second wait
1887    msleep(4200);
1888    // half second time-out
1889    timeout = 500;
1890    while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1891      if (timeout) {
1892        timeout = msleep_interruptible(timeout);
1893      } else {
1894        PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1895        return -ETIMEDOUT;
1896      }
1897    
1898    // get results of self-test
1899    // XXX double check byte-order
1900    word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1901    if (word & SELF_TEST_FAILURE) {
1902      if (word & GPINT_TST_FAILURE)
1903        sf ("interrupt");
1904      if (word & SUNI_DATA_PATTERN_FAILURE)
1905        sf ("SUNI data pattern");
1906      if (word & SUNI_DATA_BITS_FAILURE)
1907        sf ("SUNI data bits");
1908      if (word & SUNI_UTOPIA_FAILURE)
1909        sf ("SUNI UTOPIA interface");
1910      if (word & SUNI_FIFO_FAILURE)
1911        sf ("SUNI cell buffer FIFO");
1912      if (word & SRAM_FAILURE)
1913        sf ("bad SRAM");
1914      // better return value?
1915      return -EIO;
1916    }
1917    
1918  }
1919  return 0;
1920}
1921
1922/********** transfer and start the microcode **********/
1923
1924static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1925  const struct firmware *fw;
1926  unsigned long start_address;
1927  const struct ihex_binrec *rec;
1928  int res;
1929  
1930  res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
1931  if (res) {
1932    PRINTK (KERN_ERR, "Cannot load microcode data");
1933    return res;
1934  }
1935
1936  /* First record contains just the start address */
1937  rec = (const struct ihex_binrec *)fw->data;
1938  if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
1939    PRINTK (KERN_ERR, "Bad microcode data (no start record)");
1940    return -EINVAL;
1941  }
1942  start_address = be32_to_cpup((__be32 *)rec->data);
1943
1944  rec = ihex_next_binrec(rec);
1945
1946  PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1947
1948  while (rec) {
1949    PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
1950            be16_to_cpu(rec->len));
1951    if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
1952            PRINTK (KERN_ERR, "Bad microcode data (record too long)");
1953            return -EINVAL;
1954    }
1955    if (be16_to_cpu(rec->len) & 3) {
1956            PRINTK (KERN_ERR, "Bad microcode data (odd number of bytes)");
1957            return -EINVAL;
1958    }
1959    res = loader_write(lb, dev, rec);
1960    if (res)
1961      break;
1962
1963    res = loader_verify(lb, dev, rec);
1964    if (res)
1965      break;
1966  }
1967  release_firmware(fw);
1968  if (!res)
1969    res = loader_start(lb, dev, start_address);
1970
1971  return res;
1972}
1973
1974/********** give adapter parameters **********/
1975  
1976static inline __be32 bus_addr(void * addr) {
1977    return cpu_to_be32 (virt_to_bus (addr));
1978}
1979
1980static int __devinit amb_talk (amb_dev * dev) {
1981  adap_talk_block a;
1982  unsigned char pool;
1983  unsigned long timeout;
1984  
1985  PRINTD (DBG_FLOW, "amb_talk %p", dev);
1986  
1987  a.command_start = bus_addr (dev->cq.ptrs.start);
1988  a.command_end   = bus_addr (dev->cq.ptrs.limit);
1989  a.tx_start      = bus_addr (dev->txq.in.start);
1990  a.tx_end        = bus_addr (dev->txq.in.limit);
1991  a.txcom_start   = bus_addr (dev->txq.out.start);
1992  a.txcom_end     = bus_addr (dev->txq.out.limit);
1993  
1994  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1995    // the other "a" items are set up by the adapter
1996    a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
1997    a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
1998    a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
1999    a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2000    a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2001  }
2002  
2003#ifdef AMB_NEW_MICROCODE
2004  // disable fast PLX prefetching
2005  a.init_flags = 0;
2006#endif
2007  
2008  // pass the structure
2009  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2010  
2011  // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2012  msleep(2200);
2013  // give the adapter another half second?
2014  timeout = 500;
2015  while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2016    if (timeout) {
2017      timeout = msleep_interruptible(timeout);
2018    } else {
2019      PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2020      return -ETIMEDOUT;
2021    }
2022  
2023  return 0;
2024}
2025
2026// get microcode version
2027static void __devinit amb_ucode_version (amb_dev * dev) {
2028  u32 major;
2029  u32 minor;
2030  command cmd;
2031  cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2032  while (command_do (dev, &cmd)) {
2033    set_current_state(TASK_UNINTERRUPTIBLE);
2034    schedule();
2035  }
2036  major = be32_to_cpu (cmd.args.version.major);
2037  minor = be32_to_cpu (cmd.args.version.minor);
2038  PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2039}
2040  
2041// get end station address
2042static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2043  u32 lower4;
2044  u16 upper2;
2045  command cmd;
2046  
2047  cmd.request = cpu_to_be32 (SRB_GET_BIA);
2048  while (command_do (dev, &cmd)) {
2049    set_current_state(TASK_UNINTERRUPTIBLE);
2050    schedule();
2051  }
2052  lower4 = be32_to_cpu (cmd.args.bia.lower4);
2053  upper2 = be32_to_cpu (cmd.args.bia.upper2);
2054  PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2055  
2056  if (esi) {
2057    unsigned int i;
2058    
2059    PRINTDB (DBG_INIT, "ESI:");
2060    for (i = 0; i < ESI_LEN; ++i) {
2061      if (i < 4)
2062          esi[i] = bitrev8(lower4>>(8*i));
2063      else
2064          esi[i] = bitrev8(upper2>>(8*(i-4)));
2065      PRINTDM (DBG_INIT, " %02x", esi[i]);
2066    }
2067    
2068    PRINTDE (DBG_INIT, "");
2069  }
2070  
2071  return;
2072}
2073  
2074static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2075{
2076        // fix up the PLX-mapped window base address to match the block
2077        unsigned long blb;
2078        u32 mapreg;
2079        blb = virt_to_bus(lb);
2080        // the kernel stack had better not ever cross a 1Gb boundary!
2081        mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2082        mapreg &= ~onegigmask;
2083        mapreg |= blb & onegigmask;
2084        wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2085        return;
2086}
2087
2088static int __devinit amb_init (amb_dev * dev)
2089{
2090  loader_block lb;
2091  
2092  u32 version;
2093  
2094  if (amb_reset (dev, 1)) {
2095    PRINTK (KERN_ERR, "card reset failed!");
2096  } else {
2097    fixup_plx_window (dev, &lb);
2098    
2099    if (get_loader_version (&lb, dev, &version)) {
2100      PRINTK (KERN_INFO, "failed to get loader version");
2101    } else {
2102      PRINTK (KERN_INFO, "loader version is %08x", version);
2103      
2104      if (ucode_init (&lb, dev)) {
2105        PRINTK (KERN_ERR, "microcode failure");
2106      } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2107        PRINTK (KERN_ERR, "failed to get memory for queues");
2108      } else {
2109        
2110        if (amb_talk (dev)) {
2111          PRINTK (KERN_ERR, "adapter did not accept queues");
2112        } else {
2113          
2114          amb_ucode_version (dev);
2115          return 0;
2116          
2117        } /* amb_talk */
2118        
2119        destroy_queues (dev);
2120      } /* create_queues, ucode_init */
2121      
2122      amb_reset (dev, 0);
2123    } /* get_loader_version */
2124    
2125  } /* amb_reset */
2126  
2127  return -EINVAL;
2128}
2129
2130static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) 
2131{
2132      unsigned char pool;
2133      
2134      // set up known dev items straight away
2135      dev->pci_dev = pci_dev; 
2136      pci_set_drvdata(pci_dev, dev);
2137      
2138      dev->iobase = pci_resource_start (pci_dev, 1);
2139      dev->irq = pci_dev->irq; 
2140      dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2141      
2142      // flags (currently only dead)
2143      dev->flags = 0;
2144      
2145      // Allocate cell rates (fibre)
2146      // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2147      // to be really pedantic, this should be ATM_OC3c_PCR
2148      dev->tx_avail = ATM_OC3_PCR;
2149      dev->rx_avail = ATM_OC3_PCR;
2150      
2151#ifdef FILL_RX_POOLS_IN_BH
2152      // initialise bottom half
2153      INIT_WORK(&dev->bh, (void (*)(void *)) fill_rx_pools, dev);
2154#endif
2155      
2156      // semaphore for txer/rxer modifications - we cannot use a
2157      // spinlock as the critical region needs to switch processes
2158      mutex_init(&dev->vcc_sf);
2159      // queue manipulation spinlocks; we want atomic reads and
2160      // writes to the queue descriptors (handles IRQ and SMP)
2161      // consider replacing "int pending" -> "atomic_t available"
2162      // => problem related to who gets to move queue pointers
2163      spin_lock_init (&dev->cq.lock);
2164      spin_lock_init (&dev->txq.lock);
2165      for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2166        spin_lock_init (&dev->rxq[pool].lock);
2167}
2168
2169static void setup_pci_dev(struct pci_dev *pci_dev)
2170{
2171        unsigned char lat;
2172      
2173        // enable bus master accesses
2174        pci_set_master(pci_dev);
2175
2176        // frobnicate latency (upwards, usually)
2177        pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2178
2179        if (!pci_lat)
2180                pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2181
2182        if (lat != pci_lat) {
2183                PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2184                        lat, pci_lat);
2185                pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2186        }
2187}
2188
2189static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2190{
2191        amb_dev * dev;
2192        int err;
2193        unsigned int irq;
2194      
2195        err = pci_enable_device(pci_dev);
2196        if (err < 0) {
2197                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2198                goto out;
2199        }
2200
2201        // read resources from PCI configuration space
2202        irq = pci_dev->irq;
2203
2204        if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2205                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2206                err = -EINVAL;
2207                goto out_disable;
2208        }
2209
2210        PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2211                " IO %llx, IRQ %u, MEM %p",
2212                (unsigned long long)pci_resource_start(pci_dev, 1),
2213                irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2214
2215        // check IO region
2216        err = pci_request_region(pci_dev, 1, DEV_LABEL);
2217        if (err < 0) {
2218                PRINTK (KERN_ERR, "IO range already in use!");
2219                goto out_disable;
2220        }
2221
2222        dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
2223        if (!dev) {
2224                PRINTK (KERN_ERR, "out of memory!");
2225                err = -ENOMEM;
2226                goto out_release;
2227        }
2228
2229        setup_dev(dev, pci_dev);
2230
2231        err = amb_init(dev);
2232        if (err < 0) {
2233                PRINTK (KERN_ERR, "adapter initialisation failure");
2234                goto out_free;
2235        }
2236
2237        setup_pci_dev(pci_dev);
2238
2239        // grab (but share) IRQ and install handler
2240        err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2241        if (err < 0) {
2242                PRINTK (KERN_ERR, "request IRQ failed!");
2243                goto out_reset;
2244        }
2245
2246        dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL);
2247        if (!dev->atm_dev) {
2248                PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2249                err = -EINVAL;
2250                goto out_free_irq;
2251        }
2252
2253        PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2254                dev->atm_dev->number, dev, dev->atm_dev);
2255                dev->atm_dev->dev_data = (void *) dev;
2256
2257        // register our address
2258        amb_esi (dev, dev->atm_dev->esi);
2259
2260        // 0 bits for vpi, 10 bits for vci
2261        dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2262        dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2263
2264        init_timer(&dev->housekeeping);
2265        dev->housekeeping.function = do_housekeeping;
2266        dev->housekeeping.data = (unsigned long) dev;
2267        mod_timer(&dev->housekeeping, jiffies);
2268
2269        // enable host interrupts
2270        interrupts_on (dev);
2271
2272out:
2273        return err;
2274
2275out_free_irq:
2276        free_irq(irq, dev);
2277out_reset:
2278        amb_reset(dev, 0);
2279out_free:
2280        kfree(dev);
2281out_release:
2282        pci_release_region(pci_dev, 1);
2283out_disable:
2284        pci_disable_device(pci_dev);
2285        goto out;
2286}
2287
2288
2289static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2290{
2291        struct amb_dev *dev;
2292
2293        dev = pci_get_drvdata(pci_dev);
2294
2295        PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2296        del_timer_sync(&dev->housekeeping);
2297        // the drain should not be necessary
2298        drain_rx_pools(dev);
2299        interrupts_off(dev);
2300        amb_reset(dev, 0);
2301        free_irq(dev->irq, dev);
2302        pci_disable_device(pci_dev);
2303        destroy_queues(dev);
2304        atm_dev_deregister(dev->atm_dev);
2305        kfree(dev);
2306        pci_release_region(pci_dev, 1);
2307}
2308
2309static void __init amb_check_args (void) {
2310  unsigned char pool;
2311  unsigned int max_rx_size;
2312  
2313#ifdef DEBUG_AMBASSADOR
2314  PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2315#else
2316  if (debug)
2317    PRINTK (KERN_NOTICE, "no debugging support");
2318#endif
2319  
2320  if (cmds < MIN_QUEUE_SIZE)
2321    PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2322            cmds = MIN_QUEUE_SIZE);
2323  
2324  if (txs < MIN_QUEUE_SIZE)
2325    PRINTK (KERN_NOTICE, "txs has been raised to %u",
2326            txs = MIN_QUEUE_SIZE);
2327  
2328  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2329    if (rxs[pool] < MIN_QUEUE_SIZE)
2330      PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2331              pool, rxs[pool] = MIN_QUEUE_SIZE);
2332  
2333  // buffers sizes should be greater than zero and strictly increasing
2334  max_rx_size = 0;
2335  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2336    if (rxs_bs[pool] <= max_rx_size)
2337      PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2338              pool, rxs_bs[pool]);
2339    else
2340      max_rx_size = rxs_bs[pool];
2341  
2342  if (rx_lats < MIN_RX_BUFFERS)
2343    PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2344            rx_lats = MIN_RX_BUFFERS);
2345  
2346  return;
2347}
2348
2349/********** module stuff **********/
2350
2351MODULE_AUTHOR(maintainer_string);
2352MODULE_DESCRIPTION(description_string);
2353MODULE_LICENSE("GPL");
2354module_param(debug,   ushort, 0644);
2355module_param(cmds,    uint, 0);
2356module_param(txs,     uint, 0);
2357module_param_array(rxs,     uint, NULL, 0);
2358module_param_array(rxs_bs,  uint, NULL, 0);
2359module_param(rx_lats, uint, 0);
2360module_param(pci_lat, byte, 0);
2361MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2362MODULE_PARM_DESC(cmds,    "number of command queue entries");
2363MODULE_PARM_DESC(txs,     "number of TX queue entries");
2364MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2365MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2366MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2367MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2368
2369/********** module entry **********/
2370
2371static struct pci_device_id amb_pci_tbl[] = {
2372        { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR, PCI_ANY_ID, PCI_ANY_ID,
2373          0, 0, 0 },
2374        { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD, PCI_ANY_ID, PCI_ANY_ID,
2375          0, 0, 0 },
2376        { 0, }
2377};
2378
2379MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2380
2381static struct pci_driver amb_driver = {
2382        .name =         "amb",
2383        .probe =        amb_probe,
2384        .remove =       __devexit_p(amb_remove_one),
2385        .id_table =     amb_pci_tbl,
2386};
2387
2388static int __init amb_module_init (void)
2389{
2390  PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2391  
2392  // sanity check - cast needed as printk does not support %Zu
2393  if (sizeof(amb_mem) != 4*16 + 4*12) {
2394    PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2395            (unsigned long) sizeof(amb_mem));
2396    return -ENOMEM;
2397  }
2398  
2399  show_version();
2400  
2401  amb_check_args();
2402  
2403  // get the juice
2404  return pci_register_driver(&amb_driver);
2405}
2406
2407/********** module exit **********/
2408
2409static void __exit amb_module_exit (void)
2410{
2411  PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2412
2413  pci_unregister_driver(&amb_driver);
2414}
2415
2416module_init(amb_module_init);
2417module_exit(amb_module_exit);
2418