linux/drivers/atm/ambassador.c
<<
>>
Prefs
   1/*
   2  Madge Ambassador ATM Adapter driver.
   3  Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5  This program is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published by
   7  the Free Software Foundation; either version 2 of the License, or
   8  (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful,
  11  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  GNU General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, write to the Free Software
  17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19  The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20  system and in the file COPYING in the Linux kernel source.
  21*/
  22
  23/* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25#include <linux/module.h>
  26#include <linux/types.h>
  27#include <linux/pci.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/ioport.h>
  31#include <linux/atmdev.h>
  32#include <linux/delay.h>
  33#include <linux/interrupt.h>
  34#include <linux/poison.h>
  35#include <linux/bitrev.h>
  36#include <linux/mutex.h>
  37#include <linux/firmware.h>
  38#include <linux/ihex.h>
  39#include <linux/slab.h>
  40
  41#include <linux/atomic.h>
  42#include <asm/io.h>
  43#include <asm/byteorder.h>
  44
  45#include "ambassador.h"
  46
  47#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  48#define description_string "Madge ATM Ambassador driver"
  49#define version_string "1.2.4"
  50
  51static inline void __init show_version (void) {
  52  printk ("%s version %s\n", description_string, version_string);
  53}
  54
  55/*
  56  
  57  Theory of Operation
  58  
  59  I Hardware, detection, initialisation and shutdown.
  60  
  61  1. Supported Hardware
  62  
  63  This driver is for the PCI ATMizer-based Ambassador card (except
  64  very early versions). It is not suitable for the similar EISA "TR7"
  65  card. Commercially, both cards are known as Collage Server ATM
  66  adapters.
  67  
  68  The loader supports image transfer to the card, image start and few
  69  other miscellaneous commands.
  70  
  71  Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  72  
  73  The cards are big-endian.
  74  
  75  2. Detection
  76  
  77  Standard PCI stuff, the early cards are detected and rejected.
  78  
  79  3. Initialisation
  80  
  81  The cards are reset and the self-test results are checked. The
  82  microcode image is then transferred and started. This waits for a
  83  pointer to a descriptor containing details of the host-based queues
  84  and buffers and various parameters etc. Once they are processed
  85  normal operations may begin. The BIA is read using a microcode
  86  command.
  87  
  88  4. Shutdown
  89  
  90  This may be accomplished either by a card reset or via the microcode
  91  shutdown command. Further investigation required.
  92  
  93  5. Persistent state
  94  
  95  The card reset does not affect PCI configuration (good) or the
  96  contents of several other "shared run-time registers" (bad) which
  97  include doorbell and interrupt control as well as EEPROM and PCI
  98  control. The driver must be careful when modifying these registers
  99  not to touch bits it does not use and to undo any changes at exit.
 100  
 101  II Driver software
 102  
 103  0. Generalities
 104  
 105  The adapter is quite intelligent (fast) and has a simple interface
 106  (few features). VPI is always zero, 1024 VCIs are supported. There
 107  is limited cell rate support. UBR channels can be capped and ABR
 108  (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 109  support.
 110  
 111  1. Driver <-> Adapter Communication
 112  
 113  Apart from the basic loader commands, the driver communicates
 114  through three entities: the command queue (CQ), the transmit queue
 115  pair (TXQ) and the receive queue pairs (RXQ). These three entities
 116  are set up by the host and passed to the microcode just after it has
 117  been started.
 118  
 119  All queues are host-based circular queues. They are contiguous and
 120  (due to hardware limitations) have some restrictions as to their
 121  locations in (bus) memory. They are of the "full means the same as
 122  empty so don't do that" variety since the adapter uses pointers
 123  internally.
 124  
 125  The queue pairs work as follows: one queue is for supply to the
 126  adapter, items in it are pending and are owned by the adapter; the
 127  other is the queue for return from the adapter, items in it have
 128  been dealt with by the adapter. The host adds items to the supply
 129  (TX descriptors and free RX buffer descriptors) and removes items
 130  from the return (TX and RX completions). The adapter deals with out
 131  of order completions.
 132  
 133  Interrupts (card to host) and the doorbell (host to card) are used
 134  for signalling.
 135  
 136  1. CQ
 137  
 138  This is to communicate "open VC", "close VC", "get stats" etc. to
 139  the adapter. At most one command is retired every millisecond by the
 140  card. There is no out of order completion or notification. The
 141  driver needs to check the return code of the command, waiting as
 142  appropriate.
 143  
 144  2. TXQ
 145  
 146  TX supply items are of variable length (scatter gather support) and
 147  so the queue items are (more or less) pointers to the real thing.
 148  Each TX supply item contains a unique, host-supplied handle (the skb
 149  bus address seems most sensible as this works for Alphas as well,
 150  there is no need to do any endian conversions on the handles).
 151  
 152  TX return items consist of just the handles above.
 153  
 154  3. RXQ (up to 4 of these with different lengths and buffer sizes)
 155  
 156  RX supply items consist of a unique, host-supplied handle (the skb
 157  bus address again) and a pointer to the buffer data area.
 158  
 159  RX return items consist of the handle above, the VC, length and a
 160  status word. This just screams "oh so easy" doesn't it?
 161
 162  Note on RX pool sizes:
 163   
 164  Each pool should have enough buffers to handle a back-to-back stream
 165  of minimum sized frames on a single VC. For example:
 166  
 167    frame spacing = 3us (about right)
 168    
 169    delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 170    
 171    min number of buffers for one VC = 1 + delay/spacing (buffers)
 172
 173    delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 174    
 175  The 20us delay assumes that there is no need to sleep; if we need to
 176  sleep to get buffers we are going to drop frames anyway.
 177  
 178  In fact, each pool should have enough buffers to support the
 179  simultaneous reassembly of a separate frame on each VC and cope with
 180  the case in which frames complete in round robin cell fashion on
 181  each VC.
 182  
 183  Only one frame can complete at each cell arrival, so if "n" VCs are
 184  open, the worst case is to have them all complete frames together
 185  followed by all starting new frames together.
 186  
 187    desired number of buffers = n + delay/spacing
 188    
 189  These are the extreme requirements, however, they are "n+k" for some
 190  "k" so we have only the constant to choose. This is the argument
 191  rx_lats which current defaults to 7.
 192  
 193  Actually, "n ? n+k : 0" is better and this is what is implemented,
 194  subject to the limit given by the pool size.
 195  
 196  4. Driver locking
 197  
 198  Simple spinlocks are used around the TX and RX queue mechanisms.
 199  Anyone with a faster, working method is welcome to implement it.
 200  
 201  The adapter command queue is protected with a spinlock. We always
 202  wait for commands to complete.
 203  
 204  A more complex form of locking is used around parts of the VC open
 205  and close functions. There are three reasons for a lock: 1. we need
 206  to do atomic rate reservation and release (not used yet), 2. Opening
 207  sometimes involves two adapter commands which must not be separated
 208  by another command on the same VC, 3. the changes to RX pool size
 209  must be atomic. The lock needs to work over context switches, so we
 210  use a semaphore.
 211  
 212  III Hardware Features and Microcode Bugs
 213  
 214  1. Byte Ordering
 215  
 216  *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 217  
 218  2. Memory access
 219  
 220  All structures that are not accessed using DMA must be 4-byte
 221  aligned (not a problem) and must not cross 4MB boundaries.
 222  
 223  There is a DMA memory hole at E0000000-E00000FF (groan).
 224  
 225  TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 226  but for a hardware bug).
 227  
 228  RX buffers (DMA write) must not cross 16MB boundaries and must
 229  include spare trailing bytes up to the next 4-byte boundary; they
 230  will be written with rubbish.
 231  
 232  The PLX likes to prefetch; if reading up to 4 u32 past the end of
 233  each TX fragment is not a problem, then TX can be made to go a
 234  little faster by passing a flag at init that disables a prefetch
 235  workaround. We do not pass this flag. (new microcode only)
 236  
 237  Now we:
 238  . Note that alloc_skb rounds up size to a 16byte boundary.  
 239  . Ensure all areas do not traverse 4MB boundaries.
 240  . Ensure all areas do not start at a E00000xx bus address.
 241  (I cannot be certain, but this may always hold with Linux)
 242  . Make all failures cause a loud message.
 243  . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 244  . Discard non-conforming TX fragment descriptors (the TX fails).
 245  In the future we could:
 246  . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 247  . Segment TX areas into some/more fragments, when necessary.
 248  . Relax checks for non-DMA items (ignore hole).
 249  . Give scatter-gather (iovec) requirements using ???. (?)
 250  
 251  3. VC close is broken (only for new microcode)
 252  
 253  The VC close adapter microcode command fails to do anything if any
 254  frames have been received on the VC but none have been transmitted.
 255  Frames continue to be reassembled and passed (with IRQ) to the
 256  driver.
 257  
 258  IV To Do List
 259  
 260  . Fix bugs!
 261  
 262  . Timer code may be broken.
 263  
 264  . Deal with buggy VC close (somehow) in microcode 12.
 265  
 266  . Handle interrupted and/or non-blocking writes - is this a job for
 267    the protocol layer?
 268  
 269  . Add code to break up TX fragments when they span 4MB boundaries.
 270  
 271  . Add SUNI phy layer (need to know where SUNI lives on card).
 272  
 273  . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 274    leave extra headroom space for Ambassador TX descriptors.
 275  
 276  . Understand these elements of struct atm_vcc: recvq (proto?),
 277    sleep, callback, listenq, backlog_quota, reply and user_back.
 278  
 279  . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 280  
 281  . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 282  
 283  . Decide whether RX buffer recycling is or can be made completely safe;
 284    turn it back on. It looks like Werner is going to axe this.
 285  
 286  . Implement QoS changes on open VCs (involves extracting parts of VC open
 287    and close into separate functions and using them to make changes).
 288  
 289  . Hack on command queue so that someone can issue multiple commands and wait
 290    on the last one (OR only "no-op" or "wait" commands are waited for).
 291  
 292  . Eliminate need for while-schedule around do_command.
 293  
 294*/
 295
 296static void do_housekeeping (unsigned long arg);
 297/********** globals **********/
 298
 299static unsigned short debug = 0;
 300static unsigned int cmds = 8;
 301static unsigned int txs = 32;
 302static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 303static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 304static unsigned int rx_lats = 7;
 305static unsigned char pci_lat = 0;
 306
 307static const unsigned long onegigmask = -1 << 30;
 308
 309/********** access to adapter **********/
 310
 311static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 312  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 313#ifdef AMB_MMIO
 314  dev->membase[addr / sizeof(u32)] = data;
 315#else
 316  outl (data, dev->iobase + addr);
 317#endif
 318}
 319
 320static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 321#ifdef AMB_MMIO
 322  u32 data = dev->membase[addr / sizeof(u32)];
 323#else
 324  u32 data = inl (dev->iobase + addr);
 325#endif
 326  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 327  return data;
 328}
 329
 330static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 331  __be32 be = cpu_to_be32 (data);
 332  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 333#ifdef AMB_MMIO
 334  dev->membase[addr / sizeof(u32)] = be;
 335#else
 336  outl (be, dev->iobase + addr);
 337#endif
 338}
 339
 340static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 341#ifdef AMB_MMIO
 342  __be32 be = dev->membase[addr / sizeof(u32)];
 343#else
 344  __be32 be = inl (dev->iobase + addr);
 345#endif
 346  u32 data = be32_to_cpu (be);
 347  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 348  return data;
 349}
 350
 351/********** dump routines **********/
 352
 353static inline void dump_registers (const amb_dev * dev) {
 354#ifdef DEBUG_AMBASSADOR
 355  if (debug & DBG_REGS) {
 356    size_t i;
 357    PRINTD (DBG_REGS, "reading PLX control: ");
 358    for (i = 0x00; i < 0x30; i += sizeof(u32))
 359      rd_mem (dev, i);
 360    PRINTD (DBG_REGS, "reading mailboxes: ");
 361    for (i = 0x40; i < 0x60; i += sizeof(u32))
 362      rd_mem (dev, i);
 363    PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 364    for (i = 0x60; i < 0x70; i += sizeof(u32))
 365      rd_mem (dev, i);
 366  }
 367#else
 368  (void) dev;
 369#endif
 370  return;
 371}
 372
 373static inline void dump_loader_block (volatile loader_block * lb) {
 374#ifdef DEBUG_AMBASSADOR
 375  unsigned int i;
 376  PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 377           lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 378  for (i = 0; i < MAX_COMMAND_DATA; ++i)
 379    PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 380  PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 381#else
 382  (void) lb;
 383#endif
 384  return;
 385}
 386
 387static inline void dump_command (command * cmd) {
 388#ifdef DEBUG_AMBASSADOR
 389  unsigned int i;
 390  PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 391           cmd, /*be32_to_cpu*/ (cmd->request));
 392  for (i = 0; i < 3; ++i)
 393    PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 394  PRINTDE (DBG_CMD, "");
 395#else
 396  (void) cmd;
 397#endif
 398  return;
 399}
 400
 401static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 402#ifdef DEBUG_AMBASSADOR
 403  unsigned int i;
 404  unsigned char * data = skb->data;
 405  PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 406  for (i=0; i<skb->len && i < 256;i++)
 407    PRINTDM (DBG_DATA, "%02x ", data[i]);
 408  PRINTDE (DBG_DATA,"");
 409#else
 410  (void) prefix;
 411  (void) vc;
 412  (void) skb;
 413#endif
 414  return;
 415}
 416
 417/********** check memory areas for use by Ambassador **********/
 418
 419/* see limitations under Hardware Features */
 420
 421static int check_area (void * start, size_t length) {
 422  // assumes length > 0
 423  const u32 fourmegmask = -1 << 22;
 424  const u32 twofivesixmask = -1 << 8;
 425  const u32 starthole = 0xE0000000;
 426  u32 startaddress = virt_to_bus (start);
 427  u32 lastaddress = startaddress+length-1;
 428  if ((startaddress ^ lastaddress) & fourmegmask ||
 429      (startaddress & twofivesixmask) == starthole) {
 430    PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 431            startaddress, lastaddress);
 432    return -1;
 433  } else {
 434    return 0;
 435  }
 436}
 437
 438/********** free an skb (as per ATM device driver documentation) **********/
 439
 440static void amb_kfree_skb (struct sk_buff * skb) {
 441  if (ATM_SKB(skb)->vcc->pop) {
 442    ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 443  } else {
 444    dev_kfree_skb_any (skb);
 445  }
 446}
 447
 448/********** TX completion **********/
 449
 450static void tx_complete (amb_dev * dev, tx_out * tx) {
 451  tx_simple * tx_descr = bus_to_virt (tx->handle);
 452  struct sk_buff * skb = tx_descr->skb;
 453  
 454  PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 455  
 456  // VC layer stats
 457  atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 458  
 459  // free the descriptor
 460  kfree (tx_descr);
 461  
 462  // free the skb
 463  amb_kfree_skb (skb);
 464  
 465  dev->stats.tx_ok++;
 466  return;
 467}
 468
 469/********** RX completion **********/
 470
 471static void rx_complete (amb_dev * dev, rx_out * rx) {
 472  struct sk_buff * skb = bus_to_virt (rx->handle);
 473  u16 vc = be16_to_cpu (rx->vc);
 474  // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 475  u16 status = be16_to_cpu (rx->status);
 476  u16 rx_len = be16_to_cpu (rx->length);
 477  
 478  PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 479  
 480  // XXX move this in and add to VC stats ???
 481  if (!status) {
 482    struct atm_vcc * atm_vcc = dev->rxer[vc];
 483    dev->stats.rx.ok++;
 484    
 485    if (atm_vcc) {
 486      
 487      if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 488        
 489        if (atm_charge (atm_vcc, skb->truesize)) {
 490          
 491          // prepare socket buffer
 492          ATM_SKB(skb)->vcc = atm_vcc;
 493          skb_put (skb, rx_len);
 494          
 495          dump_skb ("<<<", vc, skb);
 496          
 497          // VC layer stats
 498          atomic_inc(&atm_vcc->stats->rx);
 499          __net_timestamp(skb);
 500          // end of our responsibility
 501          atm_vcc->push (atm_vcc, skb);
 502          return;
 503          
 504        } else {
 505          // someone fix this (message), please!
 506          PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 507          // drop stats incremented in atm_charge
 508        }
 509        
 510      } else {
 511        PRINTK (KERN_INFO, "dropped over-size frame");
 512        // should we count this?
 513        atomic_inc(&atm_vcc->stats->rx_drop);
 514      }
 515      
 516    } else {
 517      PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 518      // this is an adapter bug, only in new version of microcode
 519    }
 520    
 521  } else {
 522    dev->stats.rx.error++;
 523    if (status & CRC_ERR)
 524      dev->stats.rx.badcrc++;
 525    if (status & LEN_ERR)
 526      dev->stats.rx.toolong++;
 527    if (status & ABORT_ERR)
 528      dev->stats.rx.aborted++;
 529    if (status & UNUSED_ERR)
 530      dev->stats.rx.unused++;
 531  }
 532  
 533  dev_kfree_skb_any (skb);
 534  return;
 535}
 536
 537/*
 538  
 539  Note on queue handling.
 540  
 541  Here "give" and "take" refer to queue entries and a queue (pair)
 542  rather than frames to or from the host or adapter. Empty frame
 543  buffers are given to the RX queue pair and returned unused or
 544  containing RX frames. TX frames (well, pointers to TX fragment
 545  lists) are given to the TX queue pair, completions are returned.
 546  
 547*/
 548
 549/********** command queue **********/
 550
 551// I really don't like this, but it's the best I can do at the moment
 552
 553// also, the callers are responsible for byte order as the microcode
 554// sometimes does 16-bit accesses (yuk yuk yuk)
 555
 556static int command_do (amb_dev * dev, command * cmd) {
 557  amb_cq * cq = &dev->cq;
 558  volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 559  command * my_slot;
 560  
 561  PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 562  
 563  if (test_bit (dead, &dev->flags))
 564    return 0;
 565  
 566  spin_lock (&cq->lock);
 567  
 568  // if not full...
 569  if (cq->pending < cq->maximum) {
 570    // remember my slot for later
 571    my_slot = ptrs->in;
 572    PRINTD (DBG_CMD, "command in slot %p", my_slot);
 573    
 574    dump_command (cmd);
 575    
 576    // copy command in
 577    *ptrs->in = *cmd;
 578    cq->pending++;
 579    ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 580    
 581    // mail the command
 582    wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 583    
 584    if (cq->pending > cq->high)
 585      cq->high = cq->pending;
 586    spin_unlock (&cq->lock);
 587    
 588    // these comments were in a while-loop before, msleep removes the loop
 589    // go to sleep
 590    // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 591    msleep(cq->pending);
 592    
 593    // wait for my slot to be reached (all waiters are here or above, until...)
 594    while (ptrs->out != my_slot) {
 595      PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 596      set_current_state(TASK_UNINTERRUPTIBLE);
 597      schedule();
 598    }
 599    
 600    // wait on my slot (... one gets to its slot, and... )
 601    while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 602      PRINTD (DBG_CMD, "wait: command slot completion");
 603      set_current_state(TASK_UNINTERRUPTIBLE);
 604      schedule();
 605    }
 606    
 607    PRINTD (DBG_CMD, "command complete");
 608    // update queue (... moves the queue along to the next slot)
 609    spin_lock (&cq->lock);
 610    cq->pending--;
 611    // copy command out
 612    *cmd = *ptrs->out;
 613    ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 614    spin_unlock (&cq->lock);
 615    
 616    return 0;
 617  } else {
 618    cq->filled++;
 619    spin_unlock (&cq->lock);
 620    return -EAGAIN;
 621  }
 622  
 623}
 624
 625/********** TX queue pair **********/
 626
 627static int tx_give (amb_dev * dev, tx_in * tx) {
 628  amb_txq * txq = &dev->txq;
 629  unsigned long flags;
 630  
 631  PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 632
 633  if (test_bit (dead, &dev->flags))
 634    return 0;
 635  
 636  spin_lock_irqsave (&txq->lock, flags);
 637  
 638  if (txq->pending < txq->maximum) {
 639    PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 640
 641    *txq->in.ptr = *tx;
 642    txq->pending++;
 643    txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 644    // hand over the TX and ring the bell
 645    wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 646    wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 647    
 648    if (txq->pending > txq->high)
 649      txq->high = txq->pending;
 650    spin_unlock_irqrestore (&txq->lock, flags);
 651    return 0;
 652  } else {
 653    txq->filled++;
 654    spin_unlock_irqrestore (&txq->lock, flags);
 655    return -EAGAIN;
 656  }
 657}
 658
 659static int tx_take (amb_dev * dev) {
 660  amb_txq * txq = &dev->txq;
 661  unsigned long flags;
 662  
 663  PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 664  
 665  spin_lock_irqsave (&txq->lock, flags);
 666  
 667  if (txq->pending && txq->out.ptr->handle) {
 668    // deal with TX completion
 669    tx_complete (dev, txq->out.ptr);
 670    // mark unused again
 671    txq->out.ptr->handle = 0;
 672    // remove item
 673    txq->pending--;
 674    txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 675    
 676    spin_unlock_irqrestore (&txq->lock, flags);
 677    return 0;
 678  } else {
 679    
 680    spin_unlock_irqrestore (&txq->lock, flags);
 681    return -1;
 682  }
 683}
 684
 685/********** RX queue pairs **********/
 686
 687static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 688  amb_rxq * rxq = &dev->rxq[pool];
 689  unsigned long flags;
 690  
 691  PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 692  
 693  spin_lock_irqsave (&rxq->lock, flags);
 694  
 695  if (rxq->pending < rxq->maximum) {
 696    PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 697
 698    *rxq->in.ptr = *rx;
 699    rxq->pending++;
 700    rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 701    // hand over the RX buffer
 702    wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 703    
 704    spin_unlock_irqrestore (&rxq->lock, flags);
 705    return 0;
 706  } else {
 707    spin_unlock_irqrestore (&rxq->lock, flags);
 708    return -1;
 709  }
 710}
 711
 712static int rx_take (amb_dev * dev, unsigned char pool) {
 713  amb_rxq * rxq = &dev->rxq[pool];
 714  unsigned long flags;
 715  
 716  PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 717  
 718  spin_lock_irqsave (&rxq->lock, flags);
 719  
 720  if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 721    // deal with RX completion
 722    rx_complete (dev, rxq->out.ptr);
 723    // mark unused again
 724    rxq->out.ptr->status = 0;
 725    rxq->out.ptr->length = 0;
 726    // remove item
 727    rxq->pending--;
 728    rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 729    
 730    if (rxq->pending < rxq->low)
 731      rxq->low = rxq->pending;
 732    spin_unlock_irqrestore (&rxq->lock, flags);
 733    return 0;
 734  } else {
 735    if (!rxq->pending && rxq->buffers_wanted)
 736      rxq->emptied++;
 737    spin_unlock_irqrestore (&rxq->lock, flags);
 738    return -1;
 739  }
 740}
 741
 742/********** RX Pool handling **********/
 743
 744/* pre: buffers_wanted = 0, post: pending = 0 */
 745static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 746  amb_rxq * rxq = &dev->rxq[pool];
 747  
 748  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 749  
 750  if (test_bit (dead, &dev->flags))
 751    return;
 752  
 753  /* we are not quite like the fill pool routines as we cannot just
 754     remove one buffer, we have to remove all of them, but we might as
 755     well pretend... */
 756  if (rxq->pending > rxq->buffers_wanted) {
 757    command cmd;
 758    cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 759    cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 760    while (command_do (dev, &cmd))
 761      schedule();
 762    /* the pool may also be emptied via the interrupt handler */
 763    while (rxq->pending > rxq->buffers_wanted)
 764      if (rx_take (dev, pool))
 765        schedule();
 766  }
 767  
 768  return;
 769}
 770
 771static void drain_rx_pools (amb_dev * dev) {
 772  unsigned char pool;
 773  
 774  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 775  
 776  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 777    drain_rx_pool (dev, pool);
 778}
 779
 780static void fill_rx_pool (amb_dev * dev, unsigned char pool,
 781                                 gfp_t priority)
 782{
 783  rx_in rx;
 784  amb_rxq * rxq;
 785  
 786  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 787  
 788  if (test_bit (dead, &dev->flags))
 789    return;
 790  
 791  rxq = &dev->rxq[pool];
 792  while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 793    
 794    struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 795    if (!skb) {
 796      PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 797      return;
 798    }
 799    if (check_area (skb->data, skb->truesize)) {
 800      dev_kfree_skb_any (skb);
 801      return;
 802    }
 803    // cast needed as there is no %? for pointer differences
 804    PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 805            skb, skb->head, (long) skb_end_offset(skb));
 806    rx.handle = virt_to_bus (skb);
 807    rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 808    if (rx_give (dev, &rx, pool))
 809      dev_kfree_skb_any (skb);
 810    
 811  }
 812  
 813  return;
 814}
 815
 816// top up all RX pools
 817static void fill_rx_pools (amb_dev * dev) {
 818  unsigned char pool;
 819  
 820  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 821  
 822  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 823    fill_rx_pool (dev, pool, GFP_ATOMIC);
 824  
 825  return;
 826}
 827
 828/********** enable host interrupts **********/
 829
 830static void interrupts_on (amb_dev * dev) {
 831  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 832            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 833            | AMB_INTERRUPT_BITS);
 834}
 835
 836/********** disable host interrupts **********/
 837
 838static void interrupts_off (amb_dev * dev) {
 839  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 840            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 841            &~ AMB_INTERRUPT_BITS);
 842}
 843
 844/********** interrupt handling **********/
 845
 846static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 847  amb_dev * dev = dev_id;
 848  
 849  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 850  
 851  {
 852    u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 853  
 854    // for us or someone else sharing the same interrupt
 855    if (!interrupt) {
 856      PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 857      return IRQ_NONE;
 858    }
 859    
 860    // definitely for us
 861    PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 862    wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 863  }
 864  
 865  {
 866    unsigned int irq_work = 0;
 867    unsigned char pool;
 868    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 869      while (!rx_take (dev, pool))
 870        ++irq_work;
 871    while (!tx_take (dev))
 872      ++irq_work;
 873  
 874    if (irq_work) {
 875      fill_rx_pools (dev);
 876
 877      PRINTD (DBG_IRQ, "work done: %u", irq_work);
 878    } else {
 879      PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 880    }
 881  }
 882  
 883  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 884  return IRQ_HANDLED;
 885}
 886
 887/********** make rate (not quite as much fun as Horizon) **********/
 888
 889static int make_rate (unsigned int rate, rounding r,
 890                      u16 * bits, unsigned int * actual) {
 891  unsigned char exp = -1; // hush gcc
 892  unsigned int man = -1;  // hush gcc
 893  
 894  PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 895  
 896  // rates in cells per second, ITU format (nasty 16-bit floating-point)
 897  // given 5-bit e and 9-bit m:
 898  // rate = EITHER (1+m/2^9)*2^e    OR 0
 899  // bits = EITHER 1<<14 | e<<9 | m OR 0
 900  // (bit 15 is "reserved", bit 14 "non-zero")
 901  // smallest rate is 0 (special representation)
 902  // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 903  // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 904  // simple algorithm:
 905  // find position of top bit, this gives e
 906  // remove top bit and shift (rounding if feeling clever) by 9-e
 907  
 908  // ucode bug: please don't set bit 14! so 0 rate not representable
 909  
 910  if (rate > 0xffc00000U) {
 911    // larger than largest representable rate
 912    
 913    if (r == round_up) {
 914        return -EINVAL;
 915    } else {
 916      exp = 31;
 917      man = 511;
 918    }
 919    
 920  } else if (rate) {
 921    // representable rate
 922    
 923    exp = 31;
 924    man = rate;
 925    
 926    // invariant: rate = man*2^(exp-31)
 927    while (!(man & (1<<31))) {
 928      exp = exp - 1;
 929      man = man<<1;
 930    }
 931    
 932    // man has top bit set
 933    // rate = (2^31+(man-2^31))*2^(exp-31)
 934    // rate = (1+(man-2^31)/2^31)*2^exp
 935    man = man<<1;
 936    man &= 0xffffffffU; // a nop on 32-bit systems
 937    // rate = (1+man/2^32)*2^exp
 938    
 939    // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 940    // time to lose significance... we want m in the range 0 to 2^9-1
 941    // rounding presents a minor problem... we first decide which way
 942    // we are rounding (based on given rounding direction and possibly
 943    // the bits of the mantissa that are to be discarded).
 944    
 945    switch (r) {
 946      case round_down: {
 947        // just truncate
 948        man = man>>(32-9);
 949        break;
 950      }
 951      case round_up: {
 952        // check all bits that we are discarding
 953        if (man & (~0U>>9)) {
 954          man = (man>>(32-9)) + 1;
 955          if (man == (1<<9)) {
 956            // no need to check for round up outside of range
 957            man = 0;
 958            exp += 1;
 959          }
 960        } else {
 961          man = (man>>(32-9));
 962        }
 963        break;
 964      }
 965      case round_nearest: {
 966        // check msb that we are discarding
 967        if (man & (1<<(32-9-1))) {
 968          man = (man>>(32-9)) + 1;
 969          if (man == (1<<9)) {
 970            // no need to check for round up outside of range
 971            man = 0;
 972            exp += 1;
 973          }
 974        } else {
 975          man = (man>>(32-9));
 976        }
 977        break;
 978      }
 979    }
 980    
 981  } else {
 982    // zero rate - not representable
 983    
 984    if (r == round_down) {
 985      return -EINVAL;
 986    } else {
 987      exp = 0;
 988      man = 0;
 989    }
 990    
 991  }
 992  
 993  PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
 994  
 995  if (bits)
 996    *bits = /* (1<<14) | */ (exp<<9) | man;
 997  
 998  if (actual)
 999    *actual = (exp >= 9)
1000      ? (1 << exp) + (man << (exp-9))
1001      : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1002  
1003  return 0;
1004}
1005
1006/********** Linux ATM Operations **********/
1007
1008// some are not yet implemented while others do not make sense for
1009// this device
1010
1011/********** Open a VC **********/
1012
1013static int amb_open (struct atm_vcc * atm_vcc)
1014{
1015  int error;
1016  
1017  struct atm_qos * qos;
1018  struct atm_trafprm * txtp;
1019  struct atm_trafprm * rxtp;
1020  u16 tx_rate_bits = -1; // hush gcc
1021  u16 tx_vc_bits = -1; // hush gcc
1022  u16 tx_frame_bits = -1; // hush gcc
1023  
1024  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1025  amb_vcc * vcc;
1026  unsigned char pool = -1; // hush gcc
1027  short vpi = atm_vcc->vpi;
1028  int vci = atm_vcc->vci;
1029  
1030  PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1031  
1032#ifdef ATM_VPI_UNSPEC
1033  // UNSPEC is deprecated, remove this code eventually
1034  if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1035    PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1036    return -EINVAL;
1037  }
1038#endif
1039  
1040  if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1041        0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1042    PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1043    return -EINVAL;
1044  }
1045  
1046  qos = &atm_vcc->qos;
1047  
1048  if (qos->aal != ATM_AAL5) {
1049    PRINTD (DBG_QOS, "AAL not supported");
1050    return -EINVAL;
1051  }
1052  
1053  // traffic parameters
1054  
1055  PRINTD (DBG_QOS, "TX:");
1056  txtp = &qos->txtp;
1057  if (txtp->traffic_class != ATM_NONE) {
1058    switch (txtp->traffic_class) {
1059      case ATM_UBR: {
1060        // we take "the PCR" as a rate-cap
1061        int pcr = atm_pcr_goal (txtp);
1062        if (!pcr) {
1063          // no rate cap
1064          tx_rate_bits = 0;
1065          tx_vc_bits = TX_UBR;
1066          tx_frame_bits = TX_FRAME_NOTCAP;
1067        } else {
1068          rounding r;
1069          if (pcr < 0) {
1070            r = round_down;
1071            pcr = -pcr;
1072          } else {
1073            r = round_up;
1074          }
1075          error = make_rate (pcr, r, &tx_rate_bits, NULL);
1076          if (error)
1077            return error;
1078          tx_vc_bits = TX_UBR_CAPPED;
1079          tx_frame_bits = TX_FRAME_CAPPED;
1080        }
1081        break;
1082      }
1083#if 0
1084      case ATM_ABR: {
1085        pcr = atm_pcr_goal (txtp);
1086        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1087        break;
1088      }
1089#endif
1090      default: {
1091        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1092        PRINTD (DBG_QOS, "request for non-UBR denied");
1093        return -EINVAL;
1094      }
1095    }
1096    PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1097            tx_rate_bits, tx_vc_bits);
1098  }
1099  
1100  PRINTD (DBG_QOS, "RX:");
1101  rxtp = &qos->rxtp;
1102  if (rxtp->traffic_class == ATM_NONE) {
1103    // do nothing
1104  } else {
1105    // choose an RX pool (arranged in increasing size)
1106    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1107      if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1108        PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1109                pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1110        break;
1111      }
1112    if (pool == NUM_RX_POOLS) {
1113      PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1114              "no pool suitable for VC (RX max_sdu %d is too large)",
1115              rxtp->max_sdu);
1116      return -EINVAL;
1117    }
1118    
1119    switch (rxtp->traffic_class) {
1120      case ATM_UBR: {
1121        break;
1122      }
1123#if 0
1124      case ATM_ABR: {
1125        pcr = atm_pcr_goal (rxtp);
1126        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1127        break;
1128      }
1129#endif
1130      default: {
1131        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1132        PRINTD (DBG_QOS, "request for non-UBR denied");
1133        return -EINVAL;
1134      }
1135    }
1136  }
1137  
1138  // get space for our vcc stuff
1139  vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1140  if (!vcc) {
1141    PRINTK (KERN_ERR, "out of memory!");
1142    return -ENOMEM;
1143  }
1144  atm_vcc->dev_data = (void *) vcc;
1145  
1146  // no failures beyond this point
1147  
1148  // we are not really "immediately before allocating the connection
1149  // identifier in hardware", but it will just have to do!
1150  set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1151  
1152  if (txtp->traffic_class != ATM_NONE) {
1153    command cmd;
1154    
1155    vcc->tx_frame_bits = tx_frame_bits;
1156    
1157    mutex_lock(&dev->vcc_sf);
1158    if (dev->rxer[vci]) {
1159      // RXer on the channel already, just modify rate...
1160      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1161      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1162      cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1163      while (command_do (dev, &cmd))
1164        schedule();
1165      // ... and TX flags, preserving the RX pool
1166      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1167      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1168      cmd.args.modify_flags.flags = cpu_to_be32
1169        ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1170          | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1171      while (command_do (dev, &cmd))
1172        schedule();
1173    } else {
1174      // no RXer on the channel, just open (with pool zero)
1175      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1176      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1177      cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1178      cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1179      while (command_do (dev, &cmd))
1180        schedule();
1181    }
1182    dev->txer[vci].tx_present = 1;
1183    mutex_unlock(&dev->vcc_sf);
1184  }
1185  
1186  if (rxtp->traffic_class != ATM_NONE) {
1187    command cmd;
1188    
1189    vcc->rx_info.pool = pool;
1190    
1191    mutex_lock(&dev->vcc_sf);
1192    /* grow RX buffer pool */
1193    if (!dev->rxq[pool].buffers_wanted)
1194      dev->rxq[pool].buffers_wanted = rx_lats;
1195    dev->rxq[pool].buffers_wanted += 1;
1196    fill_rx_pool (dev, pool, GFP_KERNEL);
1197    
1198    if (dev->txer[vci].tx_present) {
1199      // TXer on the channel already
1200      // switch (from pool zero) to this pool, preserving the TX bits
1201      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1202      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1203      cmd.args.modify_flags.flags = cpu_to_be32
1204        ( (pool << SRB_POOL_SHIFT)
1205          | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1206    } else {
1207      // no TXer on the channel, open the VC (with no rate info)
1208      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1209      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1210      cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1211      cmd.args.open.rate = cpu_to_be32 (0);
1212    }
1213    while (command_do (dev, &cmd))
1214      schedule();
1215    // this link allows RX frames through
1216    dev->rxer[vci] = atm_vcc;
1217    mutex_unlock(&dev->vcc_sf);
1218  }
1219  
1220  // indicate readiness
1221  set_bit(ATM_VF_READY,&atm_vcc->flags);
1222  
1223  return 0;
1224}
1225
1226/********** Close a VC **********/
1227
1228static void amb_close (struct atm_vcc * atm_vcc) {
1229  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1230  amb_vcc * vcc = AMB_VCC (atm_vcc);
1231  u16 vci = atm_vcc->vci;
1232  
1233  PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1234  
1235  // indicate unreadiness
1236  clear_bit(ATM_VF_READY,&atm_vcc->flags);
1237  
1238  // disable TXing
1239  if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1240    command cmd;
1241    
1242    mutex_lock(&dev->vcc_sf);
1243    if (dev->rxer[vci]) {
1244      // RXer still on the channel, just modify rate... XXX not really needed
1245      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1246      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1247      cmd.args.modify_rate.rate = cpu_to_be32 (0);
1248      // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1249    } else {
1250      // no RXer on the channel, close channel
1251      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1252      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1253    }
1254    dev->txer[vci].tx_present = 0;
1255    while (command_do (dev, &cmd))
1256      schedule();
1257    mutex_unlock(&dev->vcc_sf);
1258  }
1259  
1260  // disable RXing
1261  if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1262    command cmd;
1263    
1264    // this is (the?) one reason why we need the amb_vcc struct
1265    unsigned char pool = vcc->rx_info.pool;
1266    
1267    mutex_lock(&dev->vcc_sf);
1268    if (dev->txer[vci].tx_present) {
1269      // TXer still on the channel, just go to pool zero XXX not really needed
1270      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1271      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1272      cmd.args.modify_flags.flags = cpu_to_be32
1273        (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1274    } else {
1275      // no TXer on the channel, close the VC
1276      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1277      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1278    }
1279    // forget the rxer - no more skbs will be pushed
1280    if (atm_vcc != dev->rxer[vci])
1281      PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1282              "arghhh! we're going to die!",
1283              vcc, dev->rxer[vci]);
1284    dev->rxer[vci] = NULL;
1285    while (command_do (dev, &cmd))
1286      schedule();
1287    
1288    /* shrink RX buffer pool */
1289    dev->rxq[pool].buffers_wanted -= 1;
1290    if (dev->rxq[pool].buffers_wanted == rx_lats) {
1291      dev->rxq[pool].buffers_wanted = 0;
1292      drain_rx_pool (dev, pool);
1293    }
1294    mutex_unlock(&dev->vcc_sf);
1295  }
1296  
1297  // free our structure
1298  kfree (vcc);
1299  
1300  // say the VPI/VCI is free again
1301  clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1302
1303  return;
1304}
1305
1306/********** Send **********/
1307
1308static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1309  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1310  amb_vcc * vcc = AMB_VCC(atm_vcc);
1311  u16 vc = atm_vcc->vci;
1312  unsigned int tx_len = skb->len;
1313  unsigned char * tx_data = skb->data;
1314  tx_simple * tx_descr;
1315  tx_in tx;
1316  
1317  if (test_bit (dead, &dev->flags))
1318    return -EIO;
1319  
1320  PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1321          vc, tx_data, tx_len);
1322  
1323  dump_skb (">>>", vc, skb);
1324  
1325  if (!dev->txer[vc].tx_present) {
1326    PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1327    return -EBADFD;
1328  }
1329  
1330  // this is a driver private field so we have to set it ourselves,
1331  // despite the fact that we are _required_ to use it to check for a
1332  // pop function
1333  ATM_SKB(skb)->vcc = atm_vcc;
1334  
1335  if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1336    PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1337    return -EIO;
1338  }
1339  
1340  if (check_area (skb->data, skb->len)) {
1341    atomic_inc(&atm_vcc->stats->tx_err);
1342    return -ENOMEM; // ?
1343  }
1344  
1345  // allocate memory for fragments
1346  tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1347  if (!tx_descr) {
1348    PRINTK (KERN_ERR, "could not allocate TX descriptor");
1349    return -ENOMEM;
1350  }
1351  if (check_area (tx_descr, sizeof(tx_simple))) {
1352    kfree (tx_descr);
1353    return -ENOMEM;
1354  }
1355  PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1356  
1357  tx_descr->skb = skb;
1358  
1359  tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1360  tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1361  
1362  tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1363  tx_descr->tx_frag_end.vc = 0;
1364  tx_descr->tx_frag_end.next_descriptor_length = 0;
1365  tx_descr->tx_frag_end.next_descriptor = 0;
1366#ifdef AMB_NEW_MICROCODE
1367  tx_descr->tx_frag_end.cpcs_uu = 0;
1368  tx_descr->tx_frag_end.cpi = 0;
1369  tx_descr->tx_frag_end.pad = 0;
1370#endif
1371  
1372  tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1373  tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1374  tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1375  
1376  while (tx_give (dev, &tx))
1377    schedule();
1378  return 0;
1379}
1380
1381/********** Change QoS on a VC **********/
1382
1383// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1384
1385/********** Free RX Socket Buffer **********/
1386
1387#if 0
1388static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1389  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1390  amb_vcc * vcc = AMB_VCC (atm_vcc);
1391  unsigned char pool = vcc->rx_info.pool;
1392  rx_in rx;
1393  
1394  // This may be unsafe for various reasons that I cannot really guess
1395  // at. However, I note that the ATM layer calls kfree_skb rather
1396  // than dev_kfree_skb at this point so we are least covered as far
1397  // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1398
1399  PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1400          skb, atm_vcc, vcc);
1401  
1402  rx.handle = virt_to_bus (skb);
1403  rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1404  
1405  skb->data = skb->head;
1406  skb->tail = skb->head;
1407  skb->len = 0;
1408  
1409  if (!rx_give (dev, &rx, pool)) {
1410    // success
1411    PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1412    return;
1413  }
1414  
1415  // just do what the ATM layer would have done
1416  dev_kfree_skb_any (skb);
1417  
1418  return;
1419}
1420#endif
1421
1422/********** Proc File Output **********/
1423
1424static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1425  amb_dev * dev = AMB_DEV (atm_dev);
1426  int left = *pos;
1427  unsigned char pool;
1428  
1429  PRINTD (DBG_FLOW, "amb_proc_read");
1430  
1431  /* more diagnostics here? */
1432  
1433  if (!left--) {
1434    amb_stats * s = &dev->stats;
1435    return sprintf (page,
1436                    "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1437                    "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1438                    s->tx_ok, s->rx.ok, s->rx.error,
1439                    s->rx.badcrc, s->rx.toolong,
1440                    s->rx.aborted, s->rx.unused);
1441  }
1442  
1443  if (!left--) {
1444    amb_cq * c = &dev->cq;
1445    return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1446                    c->pending, c->high, c->maximum);
1447  }
1448  
1449  if (!left--) {
1450    amb_txq * t = &dev->txq;
1451    return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1452                    t->pending, t->maximum, t->high, t->filled);
1453  }
1454  
1455  if (!left--) {
1456    unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1457    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1458      amb_rxq * r = &dev->rxq[pool];
1459      count += sprintf (page+count, " %u/%u/%u %u %u",
1460                        r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1461    }
1462    count += sprintf (page+count, ".\n");
1463    return count;
1464  }
1465  
1466  if (!left--) {
1467    unsigned int count = sprintf (page, "RX buffer sizes:");
1468    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1469      amb_rxq * r = &dev->rxq[pool];
1470      count += sprintf (page+count, " %u", r->buffer_size);
1471    }
1472    count += sprintf (page+count, ".\n");
1473    return count;
1474  }
1475  
1476#if 0
1477  if (!left--) {
1478    // suni block etc?
1479  }
1480#endif
1481  
1482  return 0;
1483}
1484
1485/********** Operation Structure **********/
1486
1487static const struct atmdev_ops amb_ops = {
1488  .open         = amb_open,
1489  .close        = amb_close,
1490  .send         = amb_send,
1491  .proc_read    = amb_proc_read,
1492  .owner        = THIS_MODULE,
1493};
1494
1495/********** housekeeping **********/
1496static void do_housekeeping (unsigned long arg) {
1497  amb_dev * dev = (amb_dev *) arg;
1498  
1499  // could collect device-specific (not driver/atm-linux) stats here
1500      
1501  // last resort refill once every ten seconds
1502  fill_rx_pools (dev);
1503  mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1504  
1505  return;
1506}
1507
1508/********** creation of communication queues **********/
1509
1510static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1511                                 unsigned int txs, unsigned int * rxs,
1512                                 unsigned int * rx_buffer_sizes) {
1513  unsigned char pool;
1514  size_t total = 0;
1515  void * memory;
1516  void * limit;
1517  
1518  PRINTD (DBG_FLOW, "create_queues %p", dev);
1519  
1520  total += cmds * sizeof(command);
1521  
1522  total += txs * (sizeof(tx_in) + sizeof(tx_out));
1523  
1524  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1525    total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1526  
1527  memory = kmalloc (total, GFP_KERNEL);
1528  if (!memory) {
1529    PRINTK (KERN_ERR, "could not allocate queues");
1530    return -ENOMEM;
1531  }
1532  if (check_area (memory, total)) {
1533    PRINTK (KERN_ERR, "queues allocated in nasty area");
1534    kfree (memory);
1535    return -ENOMEM;
1536  }
1537  
1538  limit = memory + total;
1539  PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1540  
1541  PRINTD (DBG_CMD, "command queue at %p", memory);
1542  
1543  {
1544    command * cmd = memory;
1545    amb_cq * cq = &dev->cq;
1546    
1547    cq->pending = 0;
1548    cq->high = 0;
1549    cq->maximum = cmds - 1;
1550    
1551    cq->ptrs.start = cmd;
1552    cq->ptrs.in = cmd;
1553    cq->ptrs.out = cmd;
1554    cq->ptrs.limit = cmd + cmds;
1555    
1556    memory = cq->ptrs.limit;
1557  }
1558  
1559  PRINTD (DBG_TX, "TX queue pair at %p", memory);
1560  
1561  {
1562    tx_in * in = memory;
1563    tx_out * out;
1564    amb_txq * txq = &dev->txq;
1565    
1566    txq->pending = 0;
1567    txq->high = 0;
1568    txq->filled = 0;
1569    txq->maximum = txs - 1;
1570    
1571    txq->in.start = in;
1572    txq->in.ptr = in;
1573    txq->in.limit = in + txs;
1574    
1575    memory = txq->in.limit;
1576    out = memory;
1577    
1578    txq->out.start = out;
1579    txq->out.ptr = out;
1580    txq->out.limit = out + txs;
1581    
1582    memory = txq->out.limit;
1583  }
1584  
1585  PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1586  
1587  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1588    rx_in * in = memory;
1589    rx_out * out;
1590    amb_rxq * rxq = &dev->rxq[pool];
1591    
1592    rxq->buffer_size = rx_buffer_sizes[pool];
1593    rxq->buffers_wanted = 0;
1594    
1595    rxq->pending = 0;
1596    rxq->low = rxs[pool] - 1;
1597    rxq->emptied = 0;
1598    rxq->maximum = rxs[pool] - 1;
1599    
1600    rxq->in.start = in;
1601    rxq->in.ptr = in;
1602    rxq->in.limit = in + rxs[pool];
1603    
1604    memory = rxq->in.limit;
1605    out = memory;
1606    
1607    rxq->out.start = out;
1608    rxq->out.ptr = out;
1609    rxq->out.limit = out + rxs[pool];
1610    
1611    memory = rxq->out.limit;
1612  }
1613  
1614  if (memory == limit) {
1615    return 0;
1616  } else {
1617    PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1618    kfree (limit - total);
1619    return -ENOMEM;
1620  }
1621  
1622}
1623
1624/********** destruction of communication queues **********/
1625
1626static void destroy_queues (amb_dev * dev) {
1627  // all queues assumed empty
1628  void * memory = dev->cq.ptrs.start;
1629  // includes txq.in, txq.out, rxq[].in and rxq[].out
1630  
1631  PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1632  
1633  PRINTD (DBG_INIT, "freeing queues at %p", memory);
1634  kfree (memory);
1635  
1636  return;
1637}
1638
1639/********** basic loader commands and error handling **********/
1640// centisecond timeouts - guessing away here
1641static unsigned int command_timeouts [] = {
1642        [host_memory_test]     = 15,
1643        [read_adapter_memory]  = 2,
1644        [write_adapter_memory] = 2,
1645        [adapter_start]        = 50,
1646        [get_version_number]   = 10,
1647        [interrupt_host]       = 1,
1648        [flash_erase_sector]   = 1,
1649        [adap_download_block]  = 1,
1650        [adap_erase_flash]     = 1,
1651        [adap_run_in_iram]     = 1,
1652        [adap_end_download]    = 1
1653};
1654
1655
1656static unsigned int command_successes [] = {
1657        [host_memory_test]     = COMMAND_PASSED_TEST,
1658        [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1659        [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1660        [adapter_start]        = COMMAND_COMPLETE,
1661        [get_version_number]   = COMMAND_COMPLETE,
1662        [interrupt_host]       = COMMAND_COMPLETE,
1663        [flash_erase_sector]   = COMMAND_COMPLETE,
1664        [adap_download_block]  = COMMAND_COMPLETE,
1665        [adap_erase_flash]     = COMMAND_COMPLETE,
1666        [adap_run_in_iram]     = COMMAND_COMPLETE,
1667        [adap_end_download]    = COMMAND_COMPLETE
1668};
1669  
1670static  int decode_loader_result (loader_command cmd, u32 result)
1671{
1672        int res;
1673        const char *msg;
1674
1675        if (result == command_successes[cmd])
1676                return 0;
1677
1678        switch (result) {
1679                case BAD_COMMAND:
1680                        res = -EINVAL;
1681                        msg = "bad command";
1682                        break;
1683                case COMMAND_IN_PROGRESS:
1684                        res = -ETIMEDOUT;
1685                        msg = "command in progress";
1686                        break;
1687                case COMMAND_PASSED_TEST:
1688                        res = 0;
1689                        msg = "command passed test";
1690                        break;
1691                case COMMAND_FAILED_TEST:
1692                        res = -EIO;
1693                        msg = "command failed test";
1694                        break;
1695                case COMMAND_READ_DATA_OK:
1696                        res = 0;
1697                        msg = "command read data ok";
1698                        break;
1699                case COMMAND_READ_BAD_ADDRESS:
1700                        res = -EINVAL;
1701                        msg = "command read bad address";
1702                        break;
1703                case COMMAND_WRITE_DATA_OK:
1704                        res = 0;
1705                        msg = "command write data ok";
1706                        break;
1707                case COMMAND_WRITE_BAD_ADDRESS:
1708                        res = -EINVAL;
1709                        msg = "command write bad address";
1710                        break;
1711                case COMMAND_WRITE_FLASH_FAILURE:
1712                        res = -EIO;
1713                        msg = "command write flash failure";
1714                        break;
1715                case COMMAND_COMPLETE:
1716                        res = 0;
1717                        msg = "command complete";
1718                        break;
1719                case COMMAND_FLASH_ERASE_FAILURE:
1720                        res = -EIO;
1721                        msg = "command flash erase failure";
1722                        break;
1723                case COMMAND_WRITE_BAD_DATA:
1724                        res = -EINVAL;
1725                        msg = "command write bad data";
1726                        break;
1727                default:
1728                        res = -EINVAL;
1729                        msg = "unknown error";
1730                        PRINTD (DBG_LOAD|DBG_ERR,
1731                                "decode_loader_result got %d=%x !",
1732                                result, result);
1733                        break;
1734        }
1735
1736        PRINTK (KERN_ERR, "%s", msg);
1737        return res;
1738}
1739
1740static int __devinit do_loader_command (volatile loader_block * lb,
1741                                     const amb_dev * dev, loader_command cmd) {
1742  
1743  unsigned long timeout;
1744  
1745  PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1746  
1747  /* do a command
1748     
1749     Set the return value to zero, set the command type and set the
1750     valid entry to the right magic value. The payload is already
1751     correctly byte-ordered so we leave it alone. Hit the doorbell
1752     with the bus address of this structure.
1753     
1754  */
1755  
1756  lb->result = 0;
1757  lb->command = cpu_to_be32 (cmd);
1758  lb->valid = cpu_to_be32 (DMA_VALID);
1759  // dump_registers (dev);
1760  // dump_loader_block (lb);
1761  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1762  
1763  timeout = command_timeouts[cmd] * 10;
1764  
1765  while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1766    if (timeout) {
1767      timeout = msleep_interruptible(timeout);
1768    } else {
1769      PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1770      dump_registers (dev);
1771      dump_loader_block (lb);
1772      return -ETIMEDOUT;
1773    }
1774  
1775  if (cmd == adapter_start) {
1776    // wait for start command to acknowledge...
1777    timeout = 100;
1778    while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1779      if (timeout) {
1780        timeout = msleep_interruptible(timeout);
1781      } else {
1782        PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1783                be32_to_cpu (lb->result));
1784        dump_registers (dev);
1785        return -ETIMEDOUT;
1786      }
1787    return 0;
1788  } else {
1789    return decode_loader_result (cmd, be32_to_cpu (lb->result));
1790  }
1791  
1792}
1793
1794/* loader: determine loader version */
1795
1796static int __devinit get_loader_version (loader_block * lb,
1797                                      const amb_dev * dev, u32 * version) {
1798  int res;
1799  
1800  PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1801  
1802  res = do_loader_command (lb, dev, get_version_number);
1803  if (res)
1804    return res;
1805  if (version)
1806    *version = be32_to_cpu (lb->payload.version);
1807  return 0;
1808}
1809
1810/* loader: write memory data blocks */
1811
1812static int __devinit loader_write (loader_block* lb,
1813                                   const amb_dev *dev,
1814                                   const struct ihex_binrec *rec) {
1815  transfer_block * tb = &lb->payload.transfer;
1816  
1817  PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1818
1819  tb->address = rec->addr;
1820  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1821  memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
1822  return do_loader_command (lb, dev, write_adapter_memory);
1823}
1824
1825/* loader: verify memory data blocks */
1826
1827static int __devinit loader_verify (loader_block * lb,
1828                                    const amb_dev *dev,
1829                                    const struct ihex_binrec *rec) {
1830  transfer_block * tb = &lb->payload.transfer;
1831  int res;
1832  
1833  PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1834  
1835  tb->address = rec->addr;
1836  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1837  res = do_loader_command (lb, dev, read_adapter_memory);
1838  if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
1839    res = -EINVAL;
1840  return res;
1841}
1842
1843/* loader: start microcode */
1844
1845static int __devinit loader_start (loader_block * lb,
1846                                const amb_dev * dev, u32 address) {
1847  PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1848  
1849  lb->payload.start = cpu_to_be32 (address);
1850  return do_loader_command (lb, dev, adapter_start);
1851}
1852
1853/********** reset card **********/
1854
1855static inline void sf (const char * msg)
1856{
1857        PRINTK (KERN_ERR, "self-test failed: %s", msg);
1858}
1859
1860static int amb_reset (amb_dev * dev, int diags) {
1861  u32 word;
1862  
1863  PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1864  
1865  word = rd_plain (dev, offsetof(amb_mem, reset_control));
1866  // put card into reset state
1867  wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1868  // wait a short while
1869  udelay (10);
1870#if 1
1871  // put card into known good state
1872  wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1873  // clear all interrupts just in case
1874  wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1875#endif
1876  // clear self-test done flag
1877  wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1878  // take card out of reset state
1879  wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1880  
1881  if (diags) { 
1882    unsigned long timeout;
1883    // 4.2 second wait
1884    msleep(4200);
1885    // half second time-out
1886    timeout = 500;
1887    while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1888      if (timeout) {
1889        timeout = msleep_interruptible(timeout);
1890      } else {
1891        PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1892        return -ETIMEDOUT;
1893      }
1894    
1895    // get results of self-test
1896    // XXX double check byte-order
1897    word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1898    if (word & SELF_TEST_FAILURE) {
1899      if (word & GPINT_TST_FAILURE)
1900        sf ("interrupt");
1901      if (word & SUNI_DATA_PATTERN_FAILURE)
1902        sf ("SUNI data pattern");
1903      if (word & SUNI_DATA_BITS_FAILURE)
1904        sf ("SUNI data bits");
1905      if (word & SUNI_UTOPIA_FAILURE)
1906        sf ("SUNI UTOPIA interface");
1907      if (word & SUNI_FIFO_FAILURE)
1908        sf ("SUNI cell buffer FIFO");
1909      if (word & SRAM_FAILURE)
1910        sf ("bad SRAM");
1911      // better return value?
1912      return -EIO;
1913    }
1914    
1915  }
1916  return 0;
1917}
1918
1919/********** transfer and start the microcode **********/
1920
1921static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1922  const struct firmware *fw;
1923  unsigned long start_address;
1924  const struct ihex_binrec *rec;
1925  const char *errmsg = 0;
1926  int res;
1927
1928  res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
1929  if (res) {
1930    PRINTK (KERN_ERR, "Cannot load microcode data");
1931    return res;
1932  }
1933
1934  /* First record contains just the start address */
1935  rec = (const struct ihex_binrec *)fw->data;
1936  if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
1937    errmsg = "no start record";
1938    goto fail;
1939  }
1940  start_address = be32_to_cpup((__be32 *)rec->data);
1941
1942  rec = ihex_next_binrec(rec);
1943
1944  PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1945
1946  while (rec) {
1947    PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
1948            be16_to_cpu(rec->len));
1949    if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
1950            errmsg = "record too long";
1951            goto fail;
1952    }
1953    if (be16_to_cpu(rec->len) & 3) {
1954            errmsg = "odd number of bytes";
1955            goto fail;
1956    }
1957    res = loader_write(lb, dev, rec);
1958    if (res)
1959      break;
1960
1961    res = loader_verify(lb, dev, rec);
1962    if (res)
1963      break;
1964    rec = ihex_next_binrec(rec);
1965  }
1966  release_firmware(fw);
1967  if (!res)
1968    res = loader_start(lb, dev, start_address);
1969
1970  return res;
1971fail:
1972  release_firmware(fw);
1973  PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
1974  return -EINVAL;
1975}
1976
1977/********** give adapter parameters **********/
1978  
1979static inline __be32 bus_addr(void * addr) {
1980    return cpu_to_be32 (virt_to_bus (addr));
1981}
1982
1983static int __devinit amb_talk (amb_dev * dev) {
1984  adap_talk_block a;
1985  unsigned char pool;
1986  unsigned long timeout;
1987  
1988  PRINTD (DBG_FLOW, "amb_talk %p", dev);
1989  
1990  a.command_start = bus_addr (dev->cq.ptrs.start);
1991  a.command_end   = bus_addr (dev->cq.ptrs.limit);
1992  a.tx_start      = bus_addr (dev->txq.in.start);
1993  a.tx_end        = bus_addr (dev->txq.in.limit);
1994  a.txcom_start   = bus_addr (dev->txq.out.start);
1995  a.txcom_end     = bus_addr (dev->txq.out.limit);
1996  
1997  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1998    // the other "a" items are set up by the adapter
1999    a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2000    a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2001    a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2002    a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2003    a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2004  }
2005  
2006#ifdef AMB_NEW_MICROCODE
2007  // disable fast PLX prefetching
2008  a.init_flags = 0;
2009#endif
2010  
2011  // pass the structure
2012  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2013  
2014  // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2015  msleep(2200);
2016  // give the adapter another half second?
2017  timeout = 500;
2018  while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2019    if (timeout) {
2020      timeout = msleep_interruptible(timeout);
2021    } else {
2022      PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2023      return -ETIMEDOUT;
2024    }
2025  
2026  return 0;
2027}
2028
2029// get microcode version
2030static void __devinit amb_ucode_version (amb_dev * dev) {
2031  u32 major;
2032  u32 minor;
2033  command cmd;
2034  cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2035  while (command_do (dev, &cmd)) {
2036    set_current_state(TASK_UNINTERRUPTIBLE);
2037    schedule();
2038  }
2039  major = be32_to_cpu (cmd.args.version.major);
2040  minor = be32_to_cpu (cmd.args.version.minor);
2041  PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2042}
2043  
2044// get end station address
2045static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2046  u32 lower4;
2047  u16 upper2;
2048  command cmd;
2049  
2050  cmd.request = cpu_to_be32 (SRB_GET_BIA);
2051  while (command_do (dev, &cmd)) {
2052    set_current_state(TASK_UNINTERRUPTIBLE);
2053    schedule();
2054  }
2055  lower4 = be32_to_cpu (cmd.args.bia.lower4);
2056  upper2 = be32_to_cpu (cmd.args.bia.upper2);
2057  PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2058  
2059  if (esi) {
2060    unsigned int i;
2061    
2062    PRINTDB (DBG_INIT, "ESI:");
2063    for (i = 0; i < ESI_LEN; ++i) {
2064      if (i < 4)
2065          esi[i] = bitrev8(lower4>>(8*i));
2066      else
2067          esi[i] = bitrev8(upper2>>(8*(i-4)));
2068      PRINTDM (DBG_INIT, " %02x", esi[i]);
2069    }
2070    
2071    PRINTDE (DBG_INIT, "");
2072  }
2073  
2074  return;
2075}
2076  
2077static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2078{
2079        // fix up the PLX-mapped window base address to match the block
2080        unsigned long blb;
2081        u32 mapreg;
2082        blb = virt_to_bus(lb);
2083        // the kernel stack had better not ever cross a 1Gb boundary!
2084        mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2085        mapreg &= ~onegigmask;
2086        mapreg |= blb & onegigmask;
2087        wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2088        return;
2089}
2090
2091static int __devinit amb_init (amb_dev * dev)
2092{
2093  loader_block lb;
2094  
2095  u32 version;
2096  
2097  if (amb_reset (dev, 1)) {
2098    PRINTK (KERN_ERR, "card reset failed!");
2099  } else {
2100    fixup_plx_window (dev, &lb);
2101    
2102    if (get_loader_version (&lb, dev, &version)) {
2103      PRINTK (KERN_INFO, "failed to get loader version");
2104    } else {
2105      PRINTK (KERN_INFO, "loader version is %08x", version);
2106      
2107      if (ucode_init (&lb, dev)) {
2108        PRINTK (KERN_ERR, "microcode failure");
2109      } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2110        PRINTK (KERN_ERR, "failed to get memory for queues");
2111      } else {
2112        
2113        if (amb_talk (dev)) {
2114          PRINTK (KERN_ERR, "adapter did not accept queues");
2115        } else {
2116          
2117          amb_ucode_version (dev);
2118          return 0;
2119          
2120        } /* amb_talk */
2121        
2122        destroy_queues (dev);
2123      } /* create_queues, ucode_init */
2124      
2125      amb_reset (dev, 0);
2126    } /* get_loader_version */
2127    
2128  } /* amb_reset */
2129  
2130  return -EINVAL;
2131}
2132
2133static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) 
2134{
2135      unsigned char pool;
2136      
2137      // set up known dev items straight away
2138      dev->pci_dev = pci_dev; 
2139      pci_set_drvdata(pci_dev, dev);
2140      
2141      dev->iobase = pci_resource_start (pci_dev, 1);
2142      dev->irq = pci_dev->irq; 
2143      dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2144      
2145      // flags (currently only dead)
2146      dev->flags = 0;
2147      
2148      // Allocate cell rates (fibre)
2149      // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2150      // to be really pedantic, this should be ATM_OC3c_PCR
2151      dev->tx_avail = ATM_OC3_PCR;
2152      dev->rx_avail = ATM_OC3_PCR;
2153      
2154      // semaphore for txer/rxer modifications - we cannot use a
2155      // spinlock as the critical region needs to switch processes
2156      mutex_init(&dev->vcc_sf);
2157      // queue manipulation spinlocks; we want atomic reads and
2158      // writes to the queue descriptors (handles IRQ and SMP)
2159      // consider replacing "int pending" -> "atomic_t available"
2160      // => problem related to who gets to move queue pointers
2161      spin_lock_init (&dev->cq.lock);
2162      spin_lock_init (&dev->txq.lock);
2163      for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2164        spin_lock_init (&dev->rxq[pool].lock);
2165}
2166
2167static void setup_pci_dev(struct pci_dev *pci_dev)
2168{
2169        unsigned char lat;
2170      
2171        // enable bus master accesses
2172        pci_set_master(pci_dev);
2173
2174        // frobnicate latency (upwards, usually)
2175        pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2176
2177        if (!pci_lat)
2178                pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2179
2180        if (lat != pci_lat) {
2181                PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2182                        lat, pci_lat);
2183                pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2184        }
2185}
2186
2187static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2188{
2189        amb_dev * dev;
2190        int err;
2191        unsigned int irq;
2192      
2193        err = pci_enable_device(pci_dev);
2194        if (err < 0) {
2195                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2196                goto out;
2197        }
2198
2199        // read resources from PCI configuration space
2200        irq = pci_dev->irq;
2201
2202        if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2203                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2204                err = -EINVAL;
2205                goto out_disable;
2206        }
2207
2208        PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2209                " IO %llx, IRQ %u, MEM %p",
2210                (unsigned long long)pci_resource_start(pci_dev, 1),
2211                irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2212
2213        // check IO region
2214        err = pci_request_region(pci_dev, 1, DEV_LABEL);
2215        if (err < 0) {
2216                PRINTK (KERN_ERR, "IO range already in use!");
2217                goto out_disable;
2218        }
2219
2220        dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
2221        if (!dev) {
2222                PRINTK (KERN_ERR, "out of memory!");
2223                err = -ENOMEM;
2224                goto out_release;
2225        }
2226
2227        setup_dev(dev, pci_dev);
2228
2229        err = amb_init(dev);
2230        if (err < 0) {
2231                PRINTK (KERN_ERR, "adapter initialisation failure");
2232                goto out_free;
2233        }
2234
2235        setup_pci_dev(pci_dev);
2236
2237        // grab (but share) IRQ and install handler
2238        err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2239        if (err < 0) {
2240                PRINTK (KERN_ERR, "request IRQ failed!");
2241                goto out_reset;
2242        }
2243
2244        dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1,
2245                                         NULL);
2246        if (!dev->atm_dev) {
2247                PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2248                err = -EINVAL;
2249                goto out_free_irq;
2250        }
2251
2252        PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2253                dev->atm_dev->number, dev, dev->atm_dev);
2254                dev->atm_dev->dev_data = (void *) dev;
2255
2256        // register our address
2257        amb_esi (dev, dev->atm_dev->esi);
2258
2259        // 0 bits for vpi, 10 bits for vci
2260        dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2261        dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2262
2263        init_timer(&dev->housekeeping);
2264        dev->housekeeping.function = do_housekeeping;
2265        dev->housekeeping.data = (unsigned long) dev;
2266        mod_timer(&dev->housekeeping, jiffies);
2267
2268        // enable host interrupts
2269        interrupts_on (dev);
2270
2271out:
2272        return err;
2273
2274out_free_irq:
2275        free_irq(irq, dev);
2276out_reset:
2277        amb_reset(dev, 0);
2278out_free:
2279        kfree(dev);
2280out_release:
2281        pci_release_region(pci_dev, 1);
2282out_disable:
2283        pci_disable_device(pci_dev);
2284        goto out;
2285}
2286
2287
2288static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2289{
2290        struct amb_dev *dev;
2291
2292        dev = pci_get_drvdata(pci_dev);
2293
2294        PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2295        del_timer_sync(&dev->housekeeping);
2296        // the drain should not be necessary
2297        drain_rx_pools(dev);
2298        interrupts_off(dev);
2299        amb_reset(dev, 0);
2300        free_irq(dev->irq, dev);
2301        pci_disable_device(pci_dev);
2302        destroy_queues(dev);
2303        atm_dev_deregister(dev->atm_dev);
2304        kfree(dev);
2305        pci_release_region(pci_dev, 1);
2306}
2307
2308static void __init amb_check_args (void) {
2309  unsigned char pool;
2310  unsigned int max_rx_size;
2311  
2312#ifdef DEBUG_AMBASSADOR
2313  PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2314#else
2315  if (debug)
2316    PRINTK (KERN_NOTICE, "no debugging support");
2317#endif
2318  
2319  if (cmds < MIN_QUEUE_SIZE)
2320    PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2321            cmds = MIN_QUEUE_SIZE);
2322  
2323  if (txs < MIN_QUEUE_SIZE)
2324    PRINTK (KERN_NOTICE, "txs has been raised to %u",
2325            txs = MIN_QUEUE_SIZE);
2326  
2327  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2328    if (rxs[pool] < MIN_QUEUE_SIZE)
2329      PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2330              pool, rxs[pool] = MIN_QUEUE_SIZE);
2331  
2332  // buffers sizes should be greater than zero and strictly increasing
2333  max_rx_size = 0;
2334  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2335    if (rxs_bs[pool] <= max_rx_size)
2336      PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2337              pool, rxs_bs[pool]);
2338    else
2339      max_rx_size = rxs_bs[pool];
2340  
2341  if (rx_lats < MIN_RX_BUFFERS)
2342    PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2343            rx_lats = MIN_RX_BUFFERS);
2344  
2345  return;
2346}
2347
2348/********** module stuff **********/
2349
2350MODULE_AUTHOR(maintainer_string);
2351MODULE_DESCRIPTION(description_string);
2352MODULE_LICENSE("GPL");
2353MODULE_FIRMWARE("atmsar11.fw");
2354module_param(debug,   ushort, 0644);
2355module_param(cmds,    uint, 0);
2356module_param(txs,     uint, 0);
2357module_param_array(rxs,     uint, NULL, 0);
2358module_param_array(rxs_bs,  uint, NULL, 0);
2359module_param(rx_lats, uint, 0);
2360module_param(pci_lat, byte, 0);
2361MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2362MODULE_PARM_DESC(cmds,    "number of command queue entries");
2363MODULE_PARM_DESC(txs,     "number of TX queue entries");
2364MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2365MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2366MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2367MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2368
2369/********** module entry **********/
2370
2371static struct pci_device_id amb_pci_tbl[] = {
2372        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
2373        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
2374        { 0, }
2375};
2376
2377MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2378
2379static struct pci_driver amb_driver = {
2380        .name =         "amb",
2381        .probe =        amb_probe,
2382        .remove =       __devexit_p(amb_remove_one),
2383        .id_table =     amb_pci_tbl,
2384};
2385
2386static int __init amb_module_init (void)
2387{
2388  PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2389  
2390  // sanity check - cast needed as printk does not support %Zu
2391  if (sizeof(amb_mem) != 4*16 + 4*12) {
2392    PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2393            (unsigned long) sizeof(amb_mem));
2394    return -ENOMEM;
2395  }
2396  
2397  show_version();
2398  
2399  amb_check_args();
2400  
2401  // get the juice
2402  return pci_register_driver(&amb_driver);
2403}
2404
2405/********** module exit **********/
2406
2407static void __exit amb_module_exit (void)
2408{
2409  PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2410
2411  pci_unregister_driver(&amb_driver);
2412}
2413
2414module_init(amb_module_init);
2415module_exit(amb_module_exit);
2416