linux/drivers/atm/ambassador.c
<<
>>
Prefs
   1/*
   2  Madge Ambassador ATM Adapter driver.
   3  Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5  This program is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published by
   7  the Free Software Foundation; either version 2 of the License, or
   8  (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful,
  11  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  GNU General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, write to the Free Software
  17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19  The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20  system and in the file COPYING in the Linux kernel source.
  21*/
  22
  23/* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25#include <linux/module.h>
  26#include <linux/types.h>
  27#include <linux/pci.h>
  28#include <linux/kernel.h>
  29#include <linux/init.h>
  30#include <linux/ioport.h>
  31#include <linux/atmdev.h>
  32#include <linux/delay.h>
  33#include <linux/interrupt.h>
  34#include <linux/poison.h>
  35#include <linux/bitrev.h>
  36#include <linux/mutex.h>
  37#include <linux/firmware.h>
  38#include <linux/ihex.h>
  39#include <linux/slab.h>
  40
  41#include <asm/atomic.h>
  42#include <asm/io.h>
  43#include <asm/byteorder.h>
  44
  45#include "ambassador.h"
  46
  47#define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  48#define description_string "Madge ATM Ambassador driver"
  49#define version_string "1.2.4"
  50
  51static inline void __init show_version (void) {
  52  printk ("%s version %s\n", description_string, version_string);
  53}
  54
  55/*
  56  
  57  Theory of Operation
  58  
  59  I Hardware, detection, initialisation and shutdown.
  60  
  61  1. Supported Hardware
  62  
  63  This driver is for the PCI ATMizer-based Ambassador card (except
  64  very early versions). It is not suitable for the similar EISA "TR7"
  65  card. Commercially, both cards are known as Collage Server ATM
  66  adapters.
  67  
  68  The loader supports image transfer to the card, image start and few
  69  other miscellaneous commands.
  70  
  71  Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  72  
  73  The cards are big-endian.
  74  
  75  2. Detection
  76  
  77  Standard PCI stuff, the early cards are detected and rejected.
  78  
  79  3. Initialisation
  80  
  81  The cards are reset and the self-test results are checked. The
  82  microcode image is then transferred and started. This waits for a
  83  pointer to a descriptor containing details of the host-based queues
  84  and buffers and various parameters etc. Once they are processed
  85  normal operations may begin. The BIA is read using a microcode
  86  command.
  87  
  88  4. Shutdown
  89  
  90  This may be accomplished either by a card reset or via the microcode
  91  shutdown command. Further investigation required.
  92  
  93  5. Persistent state
  94  
  95  The card reset does not affect PCI configuration (good) or the
  96  contents of several other "shared run-time registers" (bad) which
  97  include doorbell and interrupt control as well as EEPROM and PCI
  98  control. The driver must be careful when modifying these registers
  99  not to touch bits it does not use and to undo any changes at exit.
 100  
 101  II Driver software
 102  
 103  0. Generalities
 104  
 105  The adapter is quite intelligent (fast) and has a simple interface
 106  (few features). VPI is always zero, 1024 VCIs are supported. There
 107  is limited cell rate support. UBR channels can be capped and ABR
 108  (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 109  support.
 110  
 111  1. Driver <-> Adapter Communication
 112  
 113  Apart from the basic loader commands, the driver communicates
 114  through three entities: the command queue (CQ), the transmit queue
 115  pair (TXQ) and the receive queue pairs (RXQ). These three entities
 116  are set up by the host and passed to the microcode just after it has
 117  been started.
 118  
 119  All queues are host-based circular queues. They are contiguous and
 120  (due to hardware limitations) have some restrictions as to their
 121  locations in (bus) memory. They are of the "full means the same as
 122  empty so don't do that" variety since the adapter uses pointers
 123  internally.
 124  
 125  The queue pairs work as follows: one queue is for supply to the
 126  adapter, items in it are pending and are owned by the adapter; the
 127  other is the queue for return from the adapter, items in it have
 128  been dealt with by the adapter. The host adds items to the supply
 129  (TX descriptors and free RX buffer descriptors) and removes items
 130  from the return (TX and RX completions). The adapter deals with out
 131  of order completions.
 132  
 133  Interrupts (card to host) and the doorbell (host to card) are used
 134  for signalling.
 135  
 136  1. CQ
 137  
 138  This is to communicate "open VC", "close VC", "get stats" etc. to
 139  the adapter. At most one command is retired every millisecond by the
 140  card. There is no out of order completion or notification. The
 141  driver needs to check the return code of the command, waiting as
 142  appropriate.
 143  
 144  2. TXQ
 145  
 146  TX supply items are of variable length (scatter gather support) and
 147  so the queue items are (more or less) pointers to the real thing.
 148  Each TX supply item contains a unique, host-supplied handle (the skb
 149  bus address seems most sensible as this works for Alphas as well,
 150  there is no need to do any endian conversions on the handles).
 151  
 152  TX return items consist of just the handles above.
 153  
 154  3. RXQ (up to 4 of these with different lengths and buffer sizes)
 155  
 156  RX supply items consist of a unique, host-supplied handle (the skb
 157  bus address again) and a pointer to the buffer data area.
 158  
 159  RX return items consist of the handle above, the VC, length and a
 160  status word. This just screams "oh so easy" doesn't it?
 161
 162  Note on RX pool sizes:
 163   
 164  Each pool should have enough buffers to handle a back-to-back stream
 165  of minimum sized frames on a single VC. For example:
 166  
 167    frame spacing = 3us (about right)
 168    
 169    delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 170    
 171    min number of buffers for one VC = 1 + delay/spacing (buffers)
 172
 173    delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 174    
 175  The 20us delay assumes that there is no need to sleep; if we need to
 176  sleep to get buffers we are going to drop frames anyway.
 177  
 178  In fact, each pool should have enough buffers to support the
 179  simultaneous reassembly of a separate frame on each VC and cope with
 180  the case in which frames complete in round robin cell fashion on
 181  each VC.
 182  
 183  Only one frame can complete at each cell arrival, so if "n" VCs are
 184  open, the worst case is to have them all complete frames together
 185  followed by all starting new frames together.
 186  
 187    desired number of buffers = n + delay/spacing
 188    
 189  These are the extreme requirements, however, they are "n+k" for some
 190  "k" so we have only the constant to choose. This is the argument
 191  rx_lats which current defaults to 7.
 192  
 193  Actually, "n ? n+k : 0" is better and this is what is implemented,
 194  subject to the limit given by the pool size.
 195  
 196  4. Driver locking
 197  
 198  Simple spinlocks are used around the TX and RX queue mechanisms.
 199  Anyone with a faster, working method is welcome to implement it.
 200  
 201  The adapter command queue is protected with a spinlock. We always
 202  wait for commands to complete.
 203  
 204  A more complex form of locking is used around parts of the VC open
 205  and close functions. There are three reasons for a lock: 1. we need
 206  to do atomic rate reservation and release (not used yet), 2. Opening
 207  sometimes involves two adapter commands which must not be separated
 208  by another command on the same VC, 3. the changes to RX pool size
 209  must be atomic. The lock needs to work over context switches, so we
 210  use a semaphore.
 211  
 212  III Hardware Features and Microcode Bugs
 213  
 214  1. Byte Ordering
 215  
 216  *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 217  
 218  2. Memory access
 219  
 220  All structures that are not accessed using DMA must be 4-byte
 221  aligned (not a problem) and must not cross 4MB boundaries.
 222  
 223  There is a DMA memory hole at E0000000-E00000FF (groan).
 224  
 225  TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 226  but for a hardware bug).
 227  
 228  RX buffers (DMA write) must not cross 16MB boundaries and must
 229  include spare trailing bytes up to the next 4-byte boundary; they
 230  will be written with rubbish.
 231  
 232  The PLX likes to prefetch; if reading up to 4 u32 past the end of
 233  each TX fragment is not a problem, then TX can be made to go a
 234  little faster by passing a flag at init that disables a prefetch
 235  workaround. We do not pass this flag. (new microcode only)
 236  
 237  Now we:
 238  . Note that alloc_skb rounds up size to a 16byte boundary.  
 239  . Ensure all areas do not traverse 4MB boundaries.
 240  . Ensure all areas do not start at a E00000xx bus address.
 241  (I cannot be certain, but this may always hold with Linux)
 242  . Make all failures cause a loud message.
 243  . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 244  . Discard non-conforming TX fragment descriptors (the TX fails).
 245  In the future we could:
 246  . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 247  . Segment TX areas into some/more fragments, when necessary.
 248  . Relax checks for non-DMA items (ignore hole).
 249  . Give scatter-gather (iovec) requirements using ???. (?)
 250  
 251  3. VC close is broken (only for new microcode)
 252  
 253  The VC close adapter microcode command fails to do anything if any
 254  frames have been received on the VC but none have been transmitted.
 255  Frames continue to be reassembled and passed (with IRQ) to the
 256  driver.
 257  
 258  IV To Do List
 259  
 260  . Fix bugs!
 261  
 262  . Timer code may be broken.
 263  
 264  . Deal with buggy VC close (somehow) in microcode 12.
 265  
 266  . Handle interrupted and/or non-blocking writes - is this a job for
 267    the protocol layer?
 268  
 269  . Add code to break up TX fragments when they span 4MB boundaries.
 270  
 271  . Add SUNI phy layer (need to know where SUNI lives on card).
 272  
 273  . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 274    leave extra headroom space for Ambassador TX descriptors.
 275  
 276  . Understand these elements of struct atm_vcc: recvq (proto?),
 277    sleep, callback, listenq, backlog_quota, reply and user_back.
 278  
 279  . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 280  
 281  . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 282  
 283  . Decide whether RX buffer recycling is or can be made completely safe;
 284    turn it back on. It looks like Werner is going to axe this.
 285  
 286  . Implement QoS changes on open VCs (involves extracting parts of VC open
 287    and close into separate functions and using them to make changes).
 288  
 289  . Hack on command queue so that someone can issue multiple commands and wait
 290    on the last one (OR only "no-op" or "wait" commands are waited for).
 291  
 292  . Eliminate need for while-schedule around do_command.
 293  
 294*/
 295
 296static void do_housekeeping (unsigned long arg);
 297/********** globals **********/
 298
 299static unsigned short debug = 0;
 300static unsigned int cmds = 8;
 301static unsigned int txs = 32;
 302static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 303static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 304static unsigned int rx_lats = 7;
 305static unsigned char pci_lat = 0;
 306
 307static const unsigned long onegigmask = -1 << 30;
 308
 309/********** access to adapter **********/
 310
 311static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 312  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 313#ifdef AMB_MMIO
 314  dev->membase[addr / sizeof(u32)] = data;
 315#else
 316  outl (data, dev->iobase + addr);
 317#endif
 318}
 319
 320static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 321#ifdef AMB_MMIO
 322  u32 data = dev->membase[addr / sizeof(u32)];
 323#else
 324  u32 data = inl (dev->iobase + addr);
 325#endif
 326  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 327  return data;
 328}
 329
 330static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 331  __be32 be = cpu_to_be32 (data);
 332  PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 333#ifdef AMB_MMIO
 334  dev->membase[addr / sizeof(u32)] = be;
 335#else
 336  outl (be, dev->iobase + addr);
 337#endif
 338}
 339
 340static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 341#ifdef AMB_MMIO
 342  __be32 be = dev->membase[addr / sizeof(u32)];
 343#else
 344  __be32 be = inl (dev->iobase + addr);
 345#endif
 346  u32 data = be32_to_cpu (be);
 347  PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 348  return data;
 349}
 350
 351/********** dump routines **********/
 352
 353static inline void dump_registers (const amb_dev * dev) {
 354#ifdef DEBUG_AMBASSADOR
 355  if (debug & DBG_REGS) {
 356    size_t i;
 357    PRINTD (DBG_REGS, "reading PLX control: ");
 358    for (i = 0x00; i < 0x30; i += sizeof(u32))
 359      rd_mem (dev, i);
 360    PRINTD (DBG_REGS, "reading mailboxes: ");
 361    for (i = 0x40; i < 0x60; i += sizeof(u32))
 362      rd_mem (dev, i);
 363    PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 364    for (i = 0x60; i < 0x70; i += sizeof(u32))
 365      rd_mem (dev, i);
 366  }
 367#else
 368  (void) dev;
 369#endif
 370  return;
 371}
 372
 373static inline void dump_loader_block (volatile loader_block * lb) {
 374#ifdef DEBUG_AMBASSADOR
 375  unsigned int i;
 376  PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 377           lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 378  for (i = 0; i < MAX_COMMAND_DATA; ++i)
 379    PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 380  PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 381#else
 382  (void) lb;
 383#endif
 384  return;
 385}
 386
 387static inline void dump_command (command * cmd) {
 388#ifdef DEBUG_AMBASSADOR
 389  unsigned int i;
 390  PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 391           cmd, /*be32_to_cpu*/ (cmd->request));
 392  for (i = 0; i < 3; ++i)
 393    PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 394  PRINTDE (DBG_CMD, "");
 395#else
 396  (void) cmd;
 397#endif
 398  return;
 399}
 400
 401static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 402#ifdef DEBUG_AMBASSADOR
 403  unsigned int i;
 404  unsigned char * data = skb->data;
 405  PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 406  for (i=0; i<skb->len && i < 256;i++)
 407    PRINTDM (DBG_DATA, "%02x ", data[i]);
 408  PRINTDE (DBG_DATA,"");
 409#else
 410  (void) prefix;
 411  (void) vc;
 412  (void) skb;
 413#endif
 414  return;
 415}
 416
 417/********** check memory areas for use by Ambassador **********/
 418
 419/* see limitations under Hardware Features */
 420
 421static int check_area (void * start, size_t length) {
 422  // assumes length > 0
 423  const u32 fourmegmask = -1 << 22;
 424  const u32 twofivesixmask = -1 << 8;
 425  const u32 starthole = 0xE0000000;
 426  u32 startaddress = virt_to_bus (start);
 427  u32 lastaddress = startaddress+length-1;
 428  if ((startaddress ^ lastaddress) & fourmegmask ||
 429      (startaddress & twofivesixmask) == starthole) {
 430    PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 431            startaddress, lastaddress);
 432    return -1;
 433  } else {
 434    return 0;
 435  }
 436}
 437
 438/********** free an skb (as per ATM device driver documentation) **********/
 439
 440static void amb_kfree_skb (struct sk_buff * skb) {
 441  if (ATM_SKB(skb)->vcc->pop) {
 442    ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 443  } else {
 444    dev_kfree_skb_any (skb);
 445  }
 446}
 447
 448/********** TX completion **********/
 449
 450static void tx_complete (amb_dev * dev, tx_out * tx) {
 451  tx_simple * tx_descr = bus_to_virt (tx->handle);
 452  struct sk_buff * skb = tx_descr->skb;
 453  
 454  PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 455  
 456  // VC layer stats
 457  atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 458  
 459  // free the descriptor
 460  kfree (tx_descr);
 461  
 462  // free the skb
 463  amb_kfree_skb (skb);
 464  
 465  dev->stats.tx_ok++;
 466  return;
 467}
 468
 469/********** RX completion **********/
 470
 471static void rx_complete (amb_dev * dev, rx_out * rx) {
 472  struct sk_buff * skb = bus_to_virt (rx->handle);
 473  u16 vc = be16_to_cpu (rx->vc);
 474  // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 475  u16 status = be16_to_cpu (rx->status);
 476  u16 rx_len = be16_to_cpu (rx->length);
 477  
 478  PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 479  
 480  // XXX move this in and add to VC stats ???
 481  if (!status) {
 482    struct atm_vcc * atm_vcc = dev->rxer[vc];
 483    dev->stats.rx.ok++;
 484    
 485    if (atm_vcc) {
 486      
 487      if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 488        
 489        if (atm_charge (atm_vcc, skb->truesize)) {
 490          
 491          // prepare socket buffer
 492          ATM_SKB(skb)->vcc = atm_vcc;
 493          skb_put (skb, rx_len);
 494          
 495          dump_skb ("<<<", vc, skb);
 496          
 497          // VC layer stats
 498          atomic_inc(&atm_vcc->stats->rx);
 499          __net_timestamp(skb);
 500          // end of our responsability
 501          atm_vcc->push (atm_vcc, skb);
 502          return;
 503          
 504        } else {
 505          // someone fix this (message), please!
 506          PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 507          // drop stats incremented in atm_charge
 508        }
 509        
 510      } else {
 511        PRINTK (KERN_INFO, "dropped over-size frame");
 512        // should we count this?
 513        atomic_inc(&atm_vcc->stats->rx_drop);
 514      }
 515      
 516    } else {
 517      PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 518      // this is an adapter bug, only in new version of microcode
 519    }
 520    
 521  } else {
 522    dev->stats.rx.error++;
 523    if (status & CRC_ERR)
 524      dev->stats.rx.badcrc++;
 525    if (status & LEN_ERR)
 526      dev->stats.rx.toolong++;
 527    if (status & ABORT_ERR)
 528      dev->stats.rx.aborted++;
 529    if (status & UNUSED_ERR)
 530      dev->stats.rx.unused++;
 531  }
 532  
 533  dev_kfree_skb_any (skb);
 534  return;
 535}
 536
 537/*
 538  
 539  Note on queue handling.
 540  
 541  Here "give" and "take" refer to queue entries and a queue (pair)
 542  rather than frames to or from the host or adapter. Empty frame
 543  buffers are given to the RX queue pair and returned unused or
 544  containing RX frames. TX frames (well, pointers to TX fragment
 545  lists) are given to the TX queue pair, completions are returned.
 546  
 547*/
 548
 549/********** command queue **********/
 550
 551// I really don't like this, but it's the best I can do at the moment
 552
 553// also, the callers are responsible for byte order as the microcode
 554// sometimes does 16-bit accesses (yuk yuk yuk)
 555
 556static int command_do (amb_dev * dev, command * cmd) {
 557  amb_cq * cq = &dev->cq;
 558  volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 559  command * my_slot;
 560  
 561  PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 562  
 563  if (test_bit (dead, &dev->flags))
 564    return 0;
 565  
 566  spin_lock (&cq->lock);
 567  
 568  // if not full...
 569  if (cq->pending < cq->maximum) {
 570    // remember my slot for later
 571    my_slot = ptrs->in;
 572    PRINTD (DBG_CMD, "command in slot %p", my_slot);
 573    
 574    dump_command (cmd);
 575    
 576    // copy command in
 577    *ptrs->in = *cmd;
 578    cq->pending++;
 579    ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 580    
 581    // mail the command
 582    wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 583    
 584    if (cq->pending > cq->high)
 585      cq->high = cq->pending;
 586    spin_unlock (&cq->lock);
 587    
 588    // these comments were in a while-loop before, msleep removes the loop
 589    // go to sleep
 590    // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 591    msleep(cq->pending);
 592    
 593    // wait for my slot to be reached (all waiters are here or above, until...)
 594    while (ptrs->out != my_slot) {
 595      PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 596      set_current_state(TASK_UNINTERRUPTIBLE);
 597      schedule();
 598    }
 599    
 600    // wait on my slot (... one gets to its slot, and... )
 601    while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 602      PRINTD (DBG_CMD, "wait: command slot completion");
 603      set_current_state(TASK_UNINTERRUPTIBLE);
 604      schedule();
 605    }
 606    
 607    PRINTD (DBG_CMD, "command complete");
 608    // update queue (... moves the queue along to the next slot)
 609    spin_lock (&cq->lock);
 610    cq->pending--;
 611    // copy command out
 612    *cmd = *ptrs->out;
 613    ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 614    spin_unlock (&cq->lock);
 615    
 616    return 0;
 617  } else {
 618    cq->filled++;
 619    spin_unlock (&cq->lock);
 620    return -EAGAIN;
 621  }
 622  
 623}
 624
 625/********** TX queue pair **********/
 626
 627static int tx_give (amb_dev * dev, tx_in * tx) {
 628  amb_txq * txq = &dev->txq;
 629  unsigned long flags;
 630  
 631  PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 632
 633  if (test_bit (dead, &dev->flags))
 634    return 0;
 635  
 636  spin_lock_irqsave (&txq->lock, flags);
 637  
 638  if (txq->pending < txq->maximum) {
 639    PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 640
 641    *txq->in.ptr = *tx;
 642    txq->pending++;
 643    txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 644    // hand over the TX and ring the bell
 645    wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 646    wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 647    
 648    if (txq->pending > txq->high)
 649      txq->high = txq->pending;
 650    spin_unlock_irqrestore (&txq->lock, flags);
 651    return 0;
 652  } else {
 653    txq->filled++;
 654    spin_unlock_irqrestore (&txq->lock, flags);
 655    return -EAGAIN;
 656  }
 657}
 658
 659static int tx_take (amb_dev * dev) {
 660  amb_txq * txq = &dev->txq;
 661  unsigned long flags;
 662  
 663  PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 664  
 665  spin_lock_irqsave (&txq->lock, flags);
 666  
 667  if (txq->pending && txq->out.ptr->handle) {
 668    // deal with TX completion
 669    tx_complete (dev, txq->out.ptr);
 670    // mark unused again
 671    txq->out.ptr->handle = 0;
 672    // remove item
 673    txq->pending--;
 674    txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 675    
 676    spin_unlock_irqrestore (&txq->lock, flags);
 677    return 0;
 678  } else {
 679    
 680    spin_unlock_irqrestore (&txq->lock, flags);
 681    return -1;
 682  }
 683}
 684
 685/********** RX queue pairs **********/
 686
 687static int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 688  amb_rxq * rxq = &dev->rxq[pool];
 689  unsigned long flags;
 690  
 691  PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 692  
 693  spin_lock_irqsave (&rxq->lock, flags);
 694  
 695  if (rxq->pending < rxq->maximum) {
 696    PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 697
 698    *rxq->in.ptr = *rx;
 699    rxq->pending++;
 700    rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 701    // hand over the RX buffer
 702    wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 703    
 704    spin_unlock_irqrestore (&rxq->lock, flags);
 705    return 0;
 706  } else {
 707    spin_unlock_irqrestore (&rxq->lock, flags);
 708    return -1;
 709  }
 710}
 711
 712static int rx_take (amb_dev * dev, unsigned char pool) {
 713  amb_rxq * rxq = &dev->rxq[pool];
 714  unsigned long flags;
 715  
 716  PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 717  
 718  spin_lock_irqsave (&rxq->lock, flags);
 719  
 720  if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 721    // deal with RX completion
 722    rx_complete (dev, rxq->out.ptr);
 723    // mark unused again
 724    rxq->out.ptr->status = 0;
 725    rxq->out.ptr->length = 0;
 726    // remove item
 727    rxq->pending--;
 728    rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 729    
 730    if (rxq->pending < rxq->low)
 731      rxq->low = rxq->pending;
 732    spin_unlock_irqrestore (&rxq->lock, flags);
 733    return 0;
 734  } else {
 735    if (!rxq->pending && rxq->buffers_wanted)
 736      rxq->emptied++;
 737    spin_unlock_irqrestore (&rxq->lock, flags);
 738    return -1;
 739  }
 740}
 741
 742/********** RX Pool handling **********/
 743
 744/* pre: buffers_wanted = 0, post: pending = 0 */
 745static void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 746  amb_rxq * rxq = &dev->rxq[pool];
 747  
 748  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 749  
 750  if (test_bit (dead, &dev->flags))
 751    return;
 752  
 753  /* we are not quite like the fill pool routines as we cannot just
 754     remove one buffer, we have to remove all of them, but we might as
 755     well pretend... */
 756  if (rxq->pending > rxq->buffers_wanted) {
 757    command cmd;
 758    cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 759    cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 760    while (command_do (dev, &cmd))
 761      schedule();
 762    /* the pool may also be emptied via the interrupt handler */
 763    while (rxq->pending > rxq->buffers_wanted)
 764      if (rx_take (dev, pool))
 765        schedule();
 766  }
 767  
 768  return;
 769}
 770
 771static void drain_rx_pools (amb_dev * dev) {
 772  unsigned char pool;
 773  
 774  PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 775  
 776  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 777    drain_rx_pool (dev, pool);
 778}
 779
 780static void fill_rx_pool (amb_dev * dev, unsigned char pool,
 781                                 gfp_t priority)
 782{
 783  rx_in rx;
 784  amb_rxq * rxq;
 785  
 786  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 787  
 788  if (test_bit (dead, &dev->flags))
 789    return;
 790  
 791  rxq = &dev->rxq[pool];
 792  while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 793    
 794    struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 795    if (!skb) {
 796      PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 797      return;
 798    }
 799    if (check_area (skb->data, skb->truesize)) {
 800      dev_kfree_skb_any (skb);
 801      return;
 802    }
 803    // cast needed as there is no %? for pointer differences
 804    PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 805            skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
 806    rx.handle = virt_to_bus (skb);
 807    rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 808    if (rx_give (dev, &rx, pool))
 809      dev_kfree_skb_any (skb);
 810    
 811  }
 812  
 813  return;
 814}
 815
 816// top up all RX pools (can also be called as a bottom half)
 817static void fill_rx_pools (amb_dev * dev) {
 818  unsigned char pool;
 819  
 820  PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 821  
 822  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 823    fill_rx_pool (dev, pool, GFP_ATOMIC);
 824  
 825  return;
 826}
 827
 828/********** enable host interrupts **********/
 829
 830static void interrupts_on (amb_dev * dev) {
 831  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 832            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 833            | AMB_INTERRUPT_BITS);
 834}
 835
 836/********** disable host interrupts **********/
 837
 838static void interrupts_off (amb_dev * dev) {
 839  wr_plain (dev, offsetof(amb_mem, interrupt_control),
 840            rd_plain (dev, offsetof(amb_mem, interrupt_control))
 841            &~ AMB_INTERRUPT_BITS);
 842}
 843
 844/********** interrupt handling **********/
 845
 846static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 847  amb_dev * dev = dev_id;
 848  
 849  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 850  
 851  {
 852    u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 853  
 854    // for us or someone else sharing the same interrupt
 855    if (!interrupt) {
 856      PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 857      return IRQ_NONE;
 858    }
 859    
 860    // definitely for us
 861    PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 862    wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 863  }
 864  
 865  {
 866    unsigned int irq_work = 0;
 867    unsigned char pool;
 868    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 869      while (!rx_take (dev, pool))
 870        ++irq_work;
 871    while (!tx_take (dev))
 872      ++irq_work;
 873  
 874    if (irq_work) {
 875#ifdef FILL_RX_POOLS_IN_BH
 876      schedule_work (&dev->bh);
 877#else
 878      fill_rx_pools (dev);
 879#endif
 880
 881      PRINTD (DBG_IRQ, "work done: %u", irq_work);
 882    } else {
 883      PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 884    }
 885  }
 886  
 887  PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 888  return IRQ_HANDLED;
 889}
 890
 891/********** make rate (not quite as much fun as Horizon) **********/
 892
 893static int make_rate (unsigned int rate, rounding r,
 894                      u16 * bits, unsigned int * actual) {
 895  unsigned char exp = -1; // hush gcc
 896  unsigned int man = -1;  // hush gcc
 897  
 898  PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 899  
 900  // rates in cells per second, ITU format (nasty 16-bit floating-point)
 901  // given 5-bit e and 9-bit m:
 902  // rate = EITHER (1+m/2^9)*2^e    OR 0
 903  // bits = EITHER 1<<14 | e<<9 | m OR 0
 904  // (bit 15 is "reserved", bit 14 "non-zero")
 905  // smallest rate is 0 (special representation)
 906  // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 907  // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 908  // simple algorithm:
 909  // find position of top bit, this gives e
 910  // remove top bit and shift (rounding if feeling clever) by 9-e
 911  
 912  // ucode bug: please don't set bit 14! so 0 rate not representable
 913  
 914  if (rate > 0xffc00000U) {
 915    // larger than largest representable rate
 916    
 917    if (r == round_up) {
 918        return -EINVAL;
 919    } else {
 920      exp = 31;
 921      man = 511;
 922    }
 923    
 924  } else if (rate) {
 925    // representable rate
 926    
 927    exp = 31;
 928    man = rate;
 929    
 930    // invariant: rate = man*2^(exp-31)
 931    while (!(man & (1<<31))) {
 932      exp = exp - 1;
 933      man = man<<1;
 934    }
 935    
 936    // man has top bit set
 937    // rate = (2^31+(man-2^31))*2^(exp-31)
 938    // rate = (1+(man-2^31)/2^31)*2^exp
 939    man = man<<1;
 940    man &= 0xffffffffU; // a nop on 32-bit systems
 941    // rate = (1+man/2^32)*2^exp
 942    
 943    // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 944    // time to lose significance... we want m in the range 0 to 2^9-1
 945    // rounding presents a minor problem... we first decide which way
 946    // we are rounding (based on given rounding direction and possibly
 947    // the bits of the mantissa that are to be discarded).
 948    
 949    switch (r) {
 950      case round_down: {
 951        // just truncate
 952        man = man>>(32-9);
 953        break;
 954      }
 955      case round_up: {
 956        // check all bits that we are discarding
 957        if (man & (~0U>>9)) {
 958          man = (man>>(32-9)) + 1;
 959          if (man == (1<<9)) {
 960            // no need to check for round up outside of range
 961            man = 0;
 962            exp += 1;
 963          }
 964        } else {
 965          man = (man>>(32-9));
 966        }
 967        break;
 968      }
 969      case round_nearest: {
 970        // check msb that we are discarding
 971        if (man & (1<<(32-9-1))) {
 972          man = (man>>(32-9)) + 1;
 973          if (man == (1<<9)) {
 974            // no need to check for round up outside of range
 975            man = 0;
 976            exp += 1;
 977          }
 978        } else {
 979          man = (man>>(32-9));
 980        }
 981        break;
 982      }
 983    }
 984    
 985  } else {
 986    // zero rate - not representable
 987    
 988    if (r == round_down) {
 989      return -EINVAL;
 990    } else {
 991      exp = 0;
 992      man = 0;
 993    }
 994    
 995  }
 996  
 997  PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
 998  
 999  if (bits)
1000    *bits = /* (1<<14) | */ (exp<<9) | man;
1001  
1002  if (actual)
1003    *actual = (exp >= 9)
1004      ? (1 << exp) + (man << (exp-9))
1005      : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1006  
1007  return 0;
1008}
1009
1010/********** Linux ATM Operations **********/
1011
1012// some are not yet implemented while others do not make sense for
1013// this device
1014
1015/********** Open a VC **********/
1016
1017static int amb_open (struct atm_vcc * atm_vcc)
1018{
1019  int error;
1020  
1021  struct atm_qos * qos;
1022  struct atm_trafprm * txtp;
1023  struct atm_trafprm * rxtp;
1024  u16 tx_rate_bits = -1; // hush gcc
1025  u16 tx_vc_bits = -1; // hush gcc
1026  u16 tx_frame_bits = -1; // hush gcc
1027  
1028  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1029  amb_vcc * vcc;
1030  unsigned char pool = -1; // hush gcc
1031  short vpi = atm_vcc->vpi;
1032  int vci = atm_vcc->vci;
1033  
1034  PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1035  
1036#ifdef ATM_VPI_UNSPEC
1037  // UNSPEC is deprecated, remove this code eventually
1038  if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1039    PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1040    return -EINVAL;
1041  }
1042#endif
1043  
1044  if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1045        0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1046    PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1047    return -EINVAL;
1048  }
1049  
1050  qos = &atm_vcc->qos;
1051  
1052  if (qos->aal != ATM_AAL5) {
1053    PRINTD (DBG_QOS, "AAL not supported");
1054    return -EINVAL;
1055  }
1056  
1057  // traffic parameters
1058  
1059  PRINTD (DBG_QOS, "TX:");
1060  txtp = &qos->txtp;
1061  if (txtp->traffic_class != ATM_NONE) {
1062    switch (txtp->traffic_class) {
1063      case ATM_UBR: {
1064        // we take "the PCR" as a rate-cap
1065        int pcr = atm_pcr_goal (txtp);
1066        if (!pcr) {
1067          // no rate cap
1068          tx_rate_bits = 0;
1069          tx_vc_bits = TX_UBR;
1070          tx_frame_bits = TX_FRAME_NOTCAP;
1071        } else {
1072          rounding r;
1073          if (pcr < 0) {
1074            r = round_down;
1075            pcr = -pcr;
1076          } else {
1077            r = round_up;
1078          }
1079          error = make_rate (pcr, r, &tx_rate_bits, NULL);
1080          if (error)
1081            return error;
1082          tx_vc_bits = TX_UBR_CAPPED;
1083          tx_frame_bits = TX_FRAME_CAPPED;
1084        }
1085        break;
1086      }
1087#if 0
1088      case ATM_ABR: {
1089        pcr = atm_pcr_goal (txtp);
1090        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1091        break;
1092      }
1093#endif
1094      default: {
1095        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1096        PRINTD (DBG_QOS, "request for non-UBR denied");
1097        return -EINVAL;
1098      }
1099    }
1100    PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1101            tx_rate_bits, tx_vc_bits);
1102  }
1103  
1104  PRINTD (DBG_QOS, "RX:");
1105  rxtp = &qos->rxtp;
1106  if (rxtp->traffic_class == ATM_NONE) {
1107    // do nothing
1108  } else {
1109    // choose an RX pool (arranged in increasing size)
1110    for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1111      if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1112        PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1113                pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1114        break;
1115      }
1116    if (pool == NUM_RX_POOLS) {
1117      PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1118              "no pool suitable for VC (RX max_sdu %d is too large)",
1119              rxtp->max_sdu);
1120      return -EINVAL;
1121    }
1122    
1123    switch (rxtp->traffic_class) {
1124      case ATM_UBR: {
1125        break;
1126      }
1127#if 0
1128      case ATM_ABR: {
1129        pcr = atm_pcr_goal (rxtp);
1130        PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1131        break;
1132      }
1133#endif
1134      default: {
1135        // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1136        PRINTD (DBG_QOS, "request for non-UBR denied");
1137        return -EINVAL;
1138      }
1139    }
1140  }
1141  
1142  // get space for our vcc stuff
1143  vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1144  if (!vcc) {
1145    PRINTK (KERN_ERR, "out of memory!");
1146    return -ENOMEM;
1147  }
1148  atm_vcc->dev_data = (void *) vcc;
1149  
1150  // no failures beyond this point
1151  
1152  // we are not really "immediately before allocating the connection
1153  // identifier in hardware", but it will just have to do!
1154  set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1155  
1156  if (txtp->traffic_class != ATM_NONE) {
1157    command cmd;
1158    
1159    vcc->tx_frame_bits = tx_frame_bits;
1160    
1161    mutex_lock(&dev->vcc_sf);
1162    if (dev->rxer[vci]) {
1163      // RXer on the channel already, just modify rate...
1164      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1165      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1166      cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1167      while (command_do (dev, &cmd))
1168        schedule();
1169      // ... and TX flags, preserving the RX pool
1170      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1171      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1172      cmd.args.modify_flags.flags = cpu_to_be32
1173        ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1174          | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1175      while (command_do (dev, &cmd))
1176        schedule();
1177    } else {
1178      // no RXer on the channel, just open (with pool zero)
1179      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1180      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1181      cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1182      cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1183      while (command_do (dev, &cmd))
1184        schedule();
1185    }
1186    dev->txer[vci].tx_present = 1;
1187    mutex_unlock(&dev->vcc_sf);
1188  }
1189  
1190  if (rxtp->traffic_class != ATM_NONE) {
1191    command cmd;
1192    
1193    vcc->rx_info.pool = pool;
1194    
1195    mutex_lock(&dev->vcc_sf);
1196    /* grow RX buffer pool */
1197    if (!dev->rxq[pool].buffers_wanted)
1198      dev->rxq[pool].buffers_wanted = rx_lats;
1199    dev->rxq[pool].buffers_wanted += 1;
1200    fill_rx_pool (dev, pool, GFP_KERNEL);
1201    
1202    if (dev->txer[vci].tx_present) {
1203      // TXer on the channel already
1204      // switch (from pool zero) to this pool, preserving the TX bits
1205      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1206      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1207      cmd.args.modify_flags.flags = cpu_to_be32
1208        ( (pool << SRB_POOL_SHIFT)
1209          | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1210    } else {
1211      // no TXer on the channel, open the VC (with no rate info)
1212      cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1213      cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1214      cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1215      cmd.args.open.rate = cpu_to_be32 (0);
1216    }
1217    while (command_do (dev, &cmd))
1218      schedule();
1219    // this link allows RX frames through
1220    dev->rxer[vci] = atm_vcc;
1221    mutex_unlock(&dev->vcc_sf);
1222  }
1223  
1224  // indicate readiness
1225  set_bit(ATM_VF_READY,&atm_vcc->flags);
1226  
1227  return 0;
1228}
1229
1230/********** Close a VC **********/
1231
1232static void amb_close (struct atm_vcc * atm_vcc) {
1233  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1234  amb_vcc * vcc = AMB_VCC (atm_vcc);
1235  u16 vci = atm_vcc->vci;
1236  
1237  PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1238  
1239  // indicate unreadiness
1240  clear_bit(ATM_VF_READY,&atm_vcc->flags);
1241  
1242  // disable TXing
1243  if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1244    command cmd;
1245    
1246    mutex_lock(&dev->vcc_sf);
1247    if (dev->rxer[vci]) {
1248      // RXer still on the channel, just modify rate... XXX not really needed
1249      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1250      cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1251      cmd.args.modify_rate.rate = cpu_to_be32 (0);
1252      // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1253    } else {
1254      // no RXer on the channel, close channel
1255      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1256      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1257    }
1258    dev->txer[vci].tx_present = 0;
1259    while (command_do (dev, &cmd))
1260      schedule();
1261    mutex_unlock(&dev->vcc_sf);
1262  }
1263  
1264  // disable RXing
1265  if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1266    command cmd;
1267    
1268    // this is (the?) one reason why we need the amb_vcc struct
1269    unsigned char pool = vcc->rx_info.pool;
1270    
1271    mutex_lock(&dev->vcc_sf);
1272    if (dev->txer[vci].tx_present) {
1273      // TXer still on the channel, just go to pool zero XXX not really needed
1274      cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1275      cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1276      cmd.args.modify_flags.flags = cpu_to_be32
1277        (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1278    } else {
1279      // no TXer on the channel, close the VC
1280      cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1281      cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1282    }
1283    // forget the rxer - no more skbs will be pushed
1284    if (atm_vcc != dev->rxer[vci])
1285      PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1286              "arghhh! we're going to die!",
1287              vcc, dev->rxer[vci]);
1288    dev->rxer[vci] = NULL;
1289    while (command_do (dev, &cmd))
1290      schedule();
1291    
1292    /* shrink RX buffer pool */
1293    dev->rxq[pool].buffers_wanted -= 1;
1294    if (dev->rxq[pool].buffers_wanted == rx_lats) {
1295      dev->rxq[pool].buffers_wanted = 0;
1296      drain_rx_pool (dev, pool);
1297    }
1298    mutex_unlock(&dev->vcc_sf);
1299  }
1300  
1301  // free our structure
1302  kfree (vcc);
1303  
1304  // say the VPI/VCI is free again
1305  clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1306
1307  return;
1308}
1309
1310/********** Send **********/
1311
1312static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1313  amb_dev * dev = AMB_DEV(atm_vcc->dev);
1314  amb_vcc * vcc = AMB_VCC(atm_vcc);
1315  u16 vc = atm_vcc->vci;
1316  unsigned int tx_len = skb->len;
1317  unsigned char * tx_data = skb->data;
1318  tx_simple * tx_descr;
1319  tx_in tx;
1320  
1321  if (test_bit (dead, &dev->flags))
1322    return -EIO;
1323  
1324  PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1325          vc, tx_data, tx_len);
1326  
1327  dump_skb (">>>", vc, skb);
1328  
1329  if (!dev->txer[vc].tx_present) {
1330    PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1331    return -EBADFD;
1332  }
1333  
1334  // this is a driver private field so we have to set it ourselves,
1335  // despite the fact that we are _required_ to use it to check for a
1336  // pop function
1337  ATM_SKB(skb)->vcc = atm_vcc;
1338  
1339  if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1340    PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1341    return -EIO;
1342  }
1343  
1344  if (check_area (skb->data, skb->len)) {
1345    atomic_inc(&atm_vcc->stats->tx_err);
1346    return -ENOMEM; // ?
1347  }
1348  
1349  // allocate memory for fragments
1350  tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1351  if (!tx_descr) {
1352    PRINTK (KERN_ERR, "could not allocate TX descriptor");
1353    return -ENOMEM;
1354  }
1355  if (check_area (tx_descr, sizeof(tx_simple))) {
1356    kfree (tx_descr);
1357    return -ENOMEM;
1358  }
1359  PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1360  
1361  tx_descr->skb = skb;
1362  
1363  tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1364  tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1365  
1366  tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1367  tx_descr->tx_frag_end.vc = 0;
1368  tx_descr->tx_frag_end.next_descriptor_length = 0;
1369  tx_descr->tx_frag_end.next_descriptor = 0;
1370#ifdef AMB_NEW_MICROCODE
1371  tx_descr->tx_frag_end.cpcs_uu = 0;
1372  tx_descr->tx_frag_end.cpi = 0;
1373  tx_descr->tx_frag_end.pad = 0;
1374#endif
1375  
1376  tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1377  tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1378  tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1379  
1380  while (tx_give (dev, &tx))
1381    schedule();
1382  return 0;
1383}
1384
1385/********** Change QoS on a VC **********/
1386
1387// int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1388
1389/********** Free RX Socket Buffer **********/
1390
1391#if 0
1392static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1393  amb_dev * dev = AMB_DEV (atm_vcc->dev);
1394  amb_vcc * vcc = AMB_VCC (atm_vcc);
1395  unsigned char pool = vcc->rx_info.pool;
1396  rx_in rx;
1397  
1398  // This may be unsafe for various reasons that I cannot really guess
1399  // at. However, I note that the ATM layer calls kfree_skb rather
1400  // than dev_kfree_skb at this point so we are least covered as far
1401  // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1402
1403  PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1404          skb, atm_vcc, vcc);
1405  
1406  rx.handle = virt_to_bus (skb);
1407  rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1408  
1409  skb->data = skb->head;
1410  skb->tail = skb->head;
1411  skb->len = 0;
1412  
1413  if (!rx_give (dev, &rx, pool)) {
1414    // success
1415    PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1416    return;
1417  }
1418  
1419  // just do what the ATM layer would have done
1420  dev_kfree_skb_any (skb);
1421  
1422  return;
1423}
1424#endif
1425
1426/********** Proc File Output **********/
1427
1428static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1429  amb_dev * dev = AMB_DEV (atm_dev);
1430  int left = *pos;
1431  unsigned char pool;
1432  
1433  PRINTD (DBG_FLOW, "amb_proc_read");
1434  
1435  /* more diagnostics here? */
1436  
1437  if (!left--) {
1438    amb_stats * s = &dev->stats;
1439    return sprintf (page,
1440                    "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1441                    "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1442                    s->tx_ok, s->rx.ok, s->rx.error,
1443                    s->rx.badcrc, s->rx.toolong,
1444                    s->rx.aborted, s->rx.unused);
1445  }
1446  
1447  if (!left--) {
1448    amb_cq * c = &dev->cq;
1449    return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1450                    c->pending, c->high, c->maximum);
1451  }
1452  
1453  if (!left--) {
1454    amb_txq * t = &dev->txq;
1455    return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1456                    t->pending, t->maximum, t->high, t->filled);
1457  }
1458  
1459  if (!left--) {
1460    unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1461    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1462      amb_rxq * r = &dev->rxq[pool];
1463      count += sprintf (page+count, " %u/%u/%u %u %u",
1464                        r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1465    }
1466    count += sprintf (page+count, ".\n");
1467    return count;
1468  }
1469  
1470  if (!left--) {
1471    unsigned int count = sprintf (page, "RX buffer sizes:");
1472    for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1473      amb_rxq * r = &dev->rxq[pool];
1474      count += sprintf (page+count, " %u", r->buffer_size);
1475    }
1476    count += sprintf (page+count, ".\n");
1477    return count;
1478  }
1479  
1480#if 0
1481  if (!left--) {
1482    // suni block etc?
1483  }
1484#endif
1485  
1486  return 0;
1487}
1488
1489/********** Operation Structure **********/
1490
1491static const struct atmdev_ops amb_ops = {
1492  .open         = amb_open,
1493  .close        = amb_close,
1494  .send         = amb_send,
1495  .proc_read    = amb_proc_read,
1496  .owner        = THIS_MODULE,
1497};
1498
1499/********** housekeeping **********/
1500static void do_housekeeping (unsigned long arg) {
1501  amb_dev * dev = (amb_dev *) arg;
1502  
1503  // could collect device-specific (not driver/atm-linux) stats here
1504      
1505  // last resort refill once every ten seconds
1506  fill_rx_pools (dev);
1507  mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1508  
1509  return;
1510}
1511
1512/********** creation of communication queues **********/
1513
1514static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1515                                 unsigned int txs, unsigned int * rxs,
1516                                 unsigned int * rx_buffer_sizes) {
1517  unsigned char pool;
1518  size_t total = 0;
1519  void * memory;
1520  void * limit;
1521  
1522  PRINTD (DBG_FLOW, "create_queues %p", dev);
1523  
1524  total += cmds * sizeof(command);
1525  
1526  total += txs * (sizeof(tx_in) + sizeof(tx_out));
1527  
1528  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1529    total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1530  
1531  memory = kmalloc (total, GFP_KERNEL);
1532  if (!memory) {
1533    PRINTK (KERN_ERR, "could not allocate queues");
1534    return -ENOMEM;
1535  }
1536  if (check_area (memory, total)) {
1537    PRINTK (KERN_ERR, "queues allocated in nasty area");
1538    kfree (memory);
1539    return -ENOMEM;
1540  }
1541  
1542  limit = memory + total;
1543  PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1544  
1545  PRINTD (DBG_CMD, "command queue at %p", memory);
1546  
1547  {
1548    command * cmd = memory;
1549    amb_cq * cq = &dev->cq;
1550    
1551    cq->pending = 0;
1552    cq->high = 0;
1553    cq->maximum = cmds - 1;
1554    
1555    cq->ptrs.start = cmd;
1556    cq->ptrs.in = cmd;
1557    cq->ptrs.out = cmd;
1558    cq->ptrs.limit = cmd + cmds;
1559    
1560    memory = cq->ptrs.limit;
1561  }
1562  
1563  PRINTD (DBG_TX, "TX queue pair at %p", memory);
1564  
1565  {
1566    tx_in * in = memory;
1567    tx_out * out;
1568    amb_txq * txq = &dev->txq;
1569    
1570    txq->pending = 0;
1571    txq->high = 0;
1572    txq->filled = 0;
1573    txq->maximum = txs - 1;
1574    
1575    txq->in.start = in;
1576    txq->in.ptr = in;
1577    txq->in.limit = in + txs;
1578    
1579    memory = txq->in.limit;
1580    out = memory;
1581    
1582    txq->out.start = out;
1583    txq->out.ptr = out;
1584    txq->out.limit = out + txs;
1585    
1586    memory = txq->out.limit;
1587  }
1588  
1589  PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1590  
1591  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1592    rx_in * in = memory;
1593    rx_out * out;
1594    amb_rxq * rxq = &dev->rxq[pool];
1595    
1596    rxq->buffer_size = rx_buffer_sizes[pool];
1597    rxq->buffers_wanted = 0;
1598    
1599    rxq->pending = 0;
1600    rxq->low = rxs[pool] - 1;
1601    rxq->emptied = 0;
1602    rxq->maximum = rxs[pool] - 1;
1603    
1604    rxq->in.start = in;
1605    rxq->in.ptr = in;
1606    rxq->in.limit = in + rxs[pool];
1607    
1608    memory = rxq->in.limit;
1609    out = memory;
1610    
1611    rxq->out.start = out;
1612    rxq->out.ptr = out;
1613    rxq->out.limit = out + rxs[pool];
1614    
1615    memory = rxq->out.limit;
1616  }
1617  
1618  if (memory == limit) {
1619    return 0;
1620  } else {
1621    PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1622    kfree (limit - total);
1623    return -ENOMEM;
1624  }
1625  
1626}
1627
1628/********** destruction of communication queues **********/
1629
1630static void destroy_queues (amb_dev * dev) {
1631  // all queues assumed empty
1632  void * memory = dev->cq.ptrs.start;
1633  // includes txq.in, txq.out, rxq[].in and rxq[].out
1634  
1635  PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1636  
1637  PRINTD (DBG_INIT, "freeing queues at %p", memory);
1638  kfree (memory);
1639  
1640  return;
1641}
1642
1643/********** basic loader commands and error handling **********/
1644// centisecond timeouts - guessing away here
1645static unsigned int command_timeouts [] = {
1646        [host_memory_test]     = 15,
1647        [read_adapter_memory]  = 2,
1648        [write_adapter_memory] = 2,
1649        [adapter_start]        = 50,
1650        [get_version_number]   = 10,
1651        [interrupt_host]       = 1,
1652        [flash_erase_sector]   = 1,
1653        [adap_download_block]  = 1,
1654        [adap_erase_flash]     = 1,
1655        [adap_run_in_iram]     = 1,
1656        [adap_end_download]    = 1
1657};
1658
1659
1660static unsigned int command_successes [] = {
1661        [host_memory_test]     = COMMAND_PASSED_TEST,
1662        [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1663        [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1664        [adapter_start]        = COMMAND_COMPLETE,
1665        [get_version_number]   = COMMAND_COMPLETE,
1666        [interrupt_host]       = COMMAND_COMPLETE,
1667        [flash_erase_sector]   = COMMAND_COMPLETE,
1668        [adap_download_block]  = COMMAND_COMPLETE,
1669        [adap_erase_flash]     = COMMAND_COMPLETE,
1670        [adap_run_in_iram]     = COMMAND_COMPLETE,
1671        [adap_end_download]    = COMMAND_COMPLETE
1672};
1673  
1674static  int decode_loader_result (loader_command cmd, u32 result)
1675{
1676        int res;
1677        const char *msg;
1678
1679        if (result == command_successes[cmd])
1680                return 0;
1681
1682        switch (result) {
1683                case BAD_COMMAND:
1684                        res = -EINVAL;
1685                        msg = "bad command";
1686                        break;
1687                case COMMAND_IN_PROGRESS:
1688                        res = -ETIMEDOUT;
1689                        msg = "command in progress";
1690                        break;
1691                case COMMAND_PASSED_TEST:
1692                        res = 0;
1693                        msg = "command passed test";
1694                        break;
1695                case COMMAND_FAILED_TEST:
1696                        res = -EIO;
1697                        msg = "command failed test";
1698                        break;
1699                case COMMAND_READ_DATA_OK:
1700                        res = 0;
1701                        msg = "command read data ok";
1702                        break;
1703                case COMMAND_READ_BAD_ADDRESS:
1704                        res = -EINVAL;
1705                        msg = "command read bad address";
1706                        break;
1707                case COMMAND_WRITE_DATA_OK:
1708                        res = 0;
1709                        msg = "command write data ok";
1710                        break;
1711                case COMMAND_WRITE_BAD_ADDRESS:
1712                        res = -EINVAL;
1713                        msg = "command write bad address";
1714                        break;
1715                case COMMAND_WRITE_FLASH_FAILURE:
1716                        res = -EIO;
1717                        msg = "command write flash failure";
1718                        break;
1719                case COMMAND_COMPLETE:
1720                        res = 0;
1721                        msg = "command complete";
1722                        break;
1723                case COMMAND_FLASH_ERASE_FAILURE:
1724                        res = -EIO;
1725                        msg = "command flash erase failure";
1726                        break;
1727                case COMMAND_WRITE_BAD_DATA:
1728                        res = -EINVAL;
1729                        msg = "command write bad data";
1730                        break;
1731                default:
1732                        res = -EINVAL;
1733                        msg = "unknown error";
1734                        PRINTD (DBG_LOAD|DBG_ERR,
1735                                "decode_loader_result got %d=%x !",
1736                                result, result);
1737                        break;
1738        }
1739
1740        PRINTK (KERN_ERR, "%s", msg);
1741        return res;
1742}
1743
1744static int __devinit do_loader_command (volatile loader_block * lb,
1745                                     const amb_dev * dev, loader_command cmd) {
1746  
1747  unsigned long timeout;
1748  
1749  PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1750  
1751  /* do a command
1752     
1753     Set the return value to zero, set the command type and set the
1754     valid entry to the right magic value. The payload is already
1755     correctly byte-ordered so we leave it alone. Hit the doorbell
1756     with the bus address of this structure.
1757     
1758  */
1759  
1760  lb->result = 0;
1761  lb->command = cpu_to_be32 (cmd);
1762  lb->valid = cpu_to_be32 (DMA_VALID);
1763  // dump_registers (dev);
1764  // dump_loader_block (lb);
1765  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1766  
1767  timeout = command_timeouts[cmd] * 10;
1768  
1769  while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1770    if (timeout) {
1771      timeout = msleep_interruptible(timeout);
1772    } else {
1773      PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1774      dump_registers (dev);
1775      dump_loader_block (lb);
1776      return -ETIMEDOUT;
1777    }
1778  
1779  if (cmd == adapter_start) {
1780    // wait for start command to acknowledge...
1781    timeout = 100;
1782    while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1783      if (timeout) {
1784        timeout = msleep_interruptible(timeout);
1785      } else {
1786        PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1787                be32_to_cpu (lb->result));
1788        dump_registers (dev);
1789        return -ETIMEDOUT;
1790      }
1791    return 0;
1792  } else {
1793    return decode_loader_result (cmd, be32_to_cpu (lb->result));
1794  }
1795  
1796}
1797
1798/* loader: determine loader version */
1799
1800static int __devinit get_loader_version (loader_block * lb,
1801                                      const amb_dev * dev, u32 * version) {
1802  int res;
1803  
1804  PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1805  
1806  res = do_loader_command (lb, dev, get_version_number);
1807  if (res)
1808    return res;
1809  if (version)
1810    *version = be32_to_cpu (lb->payload.version);
1811  return 0;
1812}
1813
1814/* loader: write memory data blocks */
1815
1816static int __devinit loader_write (loader_block* lb,
1817                                   const amb_dev *dev,
1818                                   const struct ihex_binrec *rec) {
1819  transfer_block * tb = &lb->payload.transfer;
1820  
1821  PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1822
1823  tb->address = rec->addr;
1824  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1825  memcpy(tb->data, rec->data, be16_to_cpu(rec->len));
1826  return do_loader_command (lb, dev, write_adapter_memory);
1827}
1828
1829/* loader: verify memory data blocks */
1830
1831static int __devinit loader_verify (loader_block * lb,
1832                                    const amb_dev *dev,
1833                                    const struct ihex_binrec *rec) {
1834  transfer_block * tb = &lb->payload.transfer;
1835  int res;
1836  
1837  PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1838  
1839  tb->address = rec->addr;
1840  tb->count = cpu_to_be32(be16_to_cpu(rec->len) / 4);
1841  res = do_loader_command (lb, dev, read_adapter_memory);
1842  if (!res && memcmp(tb->data, rec->data, be16_to_cpu(rec->len)))
1843    res = -EINVAL;
1844  return res;
1845}
1846
1847/* loader: start microcode */
1848
1849static int __devinit loader_start (loader_block * lb,
1850                                const amb_dev * dev, u32 address) {
1851  PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1852  
1853  lb->payload.start = cpu_to_be32 (address);
1854  return do_loader_command (lb, dev, adapter_start);
1855}
1856
1857/********** reset card **********/
1858
1859static inline void sf (const char * msg)
1860{
1861        PRINTK (KERN_ERR, "self-test failed: %s", msg);
1862}
1863
1864static int amb_reset (amb_dev * dev, int diags) {
1865  u32 word;
1866  
1867  PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1868  
1869  word = rd_plain (dev, offsetof(amb_mem, reset_control));
1870  // put card into reset state
1871  wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1872  // wait a short while
1873  udelay (10);
1874#if 1
1875  // put card into known good state
1876  wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1877  // clear all interrupts just in case
1878  wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1879#endif
1880  // clear self-test done flag
1881  wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1882  // take card out of reset state
1883  wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1884  
1885  if (diags) { 
1886    unsigned long timeout;
1887    // 4.2 second wait
1888    msleep(4200);
1889    // half second time-out
1890    timeout = 500;
1891    while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1892      if (timeout) {
1893        timeout = msleep_interruptible(timeout);
1894      } else {
1895        PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1896        return -ETIMEDOUT;
1897      }
1898    
1899    // get results of self-test
1900    // XXX double check byte-order
1901    word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1902    if (word & SELF_TEST_FAILURE) {
1903      if (word & GPINT_TST_FAILURE)
1904        sf ("interrupt");
1905      if (word & SUNI_DATA_PATTERN_FAILURE)
1906        sf ("SUNI data pattern");
1907      if (word & SUNI_DATA_BITS_FAILURE)
1908        sf ("SUNI data bits");
1909      if (word & SUNI_UTOPIA_FAILURE)
1910        sf ("SUNI UTOPIA interface");
1911      if (word & SUNI_FIFO_FAILURE)
1912        sf ("SUNI cell buffer FIFO");
1913      if (word & SRAM_FAILURE)
1914        sf ("bad SRAM");
1915      // better return value?
1916      return -EIO;
1917    }
1918    
1919  }
1920  return 0;
1921}
1922
1923/********** transfer and start the microcode **********/
1924
1925static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1926  const struct firmware *fw;
1927  unsigned long start_address;
1928  const struct ihex_binrec *rec;
1929  const char *errmsg = 0;
1930  int res;
1931
1932  res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev);
1933  if (res) {
1934    PRINTK (KERN_ERR, "Cannot load microcode data");
1935    return res;
1936  }
1937
1938  /* First record contains just the start address */
1939  rec = (const struct ihex_binrec *)fw->data;
1940  if (be16_to_cpu(rec->len) != sizeof(__be32) || be32_to_cpu(rec->addr)) {
1941    errmsg = "no start record";
1942    goto fail;
1943  }
1944  start_address = be32_to_cpup((__be32 *)rec->data);
1945
1946  rec = ihex_next_binrec(rec);
1947
1948  PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1949
1950  while (rec) {
1951    PRINTD (DBG_LOAD, "starting region (%x, %u)", be32_to_cpu(rec->addr),
1952            be16_to_cpu(rec->len));
1953    if (be16_to_cpu(rec->len) > 4 * MAX_TRANSFER_DATA) {
1954            errmsg = "record too long";
1955            goto fail;
1956    }
1957    if (be16_to_cpu(rec->len) & 3) {
1958            errmsg = "odd number of bytes";
1959            goto fail;
1960    }
1961    res = loader_write(lb, dev, rec);
1962    if (res)
1963      break;
1964
1965    res = loader_verify(lb, dev, rec);
1966    if (res)
1967      break;
1968  }
1969  release_firmware(fw);
1970  if (!res)
1971    res = loader_start(lb, dev, start_address);
1972
1973  return res;
1974fail:
1975  release_firmware(fw);
1976  PRINTK(KERN_ERR, "Bad microcode data (%s)", errmsg);
1977  return -EINVAL;
1978}
1979
1980/********** give adapter parameters **********/
1981  
1982static inline __be32 bus_addr(void * addr) {
1983    return cpu_to_be32 (virt_to_bus (addr));
1984}
1985
1986static int __devinit amb_talk (amb_dev * dev) {
1987  adap_talk_block a;
1988  unsigned char pool;
1989  unsigned long timeout;
1990  
1991  PRINTD (DBG_FLOW, "amb_talk %p", dev);
1992  
1993  a.command_start = bus_addr (dev->cq.ptrs.start);
1994  a.command_end   = bus_addr (dev->cq.ptrs.limit);
1995  a.tx_start      = bus_addr (dev->txq.in.start);
1996  a.tx_end        = bus_addr (dev->txq.in.limit);
1997  a.txcom_start   = bus_addr (dev->txq.out.start);
1998  a.txcom_end     = bus_addr (dev->txq.out.limit);
1999  
2000  for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
2001    // the other "a" items are set up by the adapter
2002    a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2003    a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2004    a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2005    a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2006    a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2007  }
2008  
2009#ifdef AMB_NEW_MICROCODE
2010  // disable fast PLX prefetching
2011  a.init_flags = 0;
2012#endif
2013  
2014  // pass the structure
2015  wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2016  
2017  // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2018  msleep(2200);
2019  // give the adapter another half second?
2020  timeout = 500;
2021  while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2022    if (timeout) {
2023      timeout = msleep_interruptible(timeout);
2024    } else {
2025      PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2026      return -ETIMEDOUT;
2027    }
2028  
2029  return 0;
2030}
2031
2032// get microcode version
2033static void __devinit amb_ucode_version (amb_dev * dev) {
2034  u32 major;
2035  u32 minor;
2036  command cmd;
2037  cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2038  while (command_do (dev, &cmd)) {
2039    set_current_state(TASK_UNINTERRUPTIBLE);
2040    schedule();
2041  }
2042  major = be32_to_cpu (cmd.args.version.major);
2043  minor = be32_to_cpu (cmd.args.version.minor);
2044  PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2045}
2046  
2047// get end station address
2048static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2049  u32 lower4;
2050  u16 upper2;
2051  command cmd;
2052  
2053  cmd.request = cpu_to_be32 (SRB_GET_BIA);
2054  while (command_do (dev, &cmd)) {
2055    set_current_state(TASK_UNINTERRUPTIBLE);
2056    schedule();
2057  }
2058  lower4 = be32_to_cpu (cmd.args.bia.lower4);
2059  upper2 = be32_to_cpu (cmd.args.bia.upper2);
2060  PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2061  
2062  if (esi) {
2063    unsigned int i;
2064    
2065    PRINTDB (DBG_INIT, "ESI:");
2066    for (i = 0; i < ESI_LEN; ++i) {
2067      if (i < 4)
2068          esi[i] = bitrev8(lower4>>(8*i));
2069      else
2070          esi[i] = bitrev8(upper2>>(8*(i-4)));
2071      PRINTDM (DBG_INIT, " %02x", esi[i]);
2072    }
2073    
2074    PRINTDE (DBG_INIT, "");
2075  }
2076  
2077  return;
2078}
2079  
2080static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2081{
2082        // fix up the PLX-mapped window base address to match the block
2083        unsigned long blb;
2084        u32 mapreg;
2085        blb = virt_to_bus(lb);
2086        // the kernel stack had better not ever cross a 1Gb boundary!
2087        mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2088        mapreg &= ~onegigmask;
2089        mapreg |= blb & onegigmask;
2090        wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2091        return;
2092}
2093
2094static int __devinit amb_init (amb_dev * dev)
2095{
2096  loader_block lb;
2097  
2098  u32 version;
2099  
2100  if (amb_reset (dev, 1)) {
2101    PRINTK (KERN_ERR, "card reset failed!");
2102  } else {
2103    fixup_plx_window (dev, &lb);
2104    
2105    if (get_loader_version (&lb, dev, &version)) {
2106      PRINTK (KERN_INFO, "failed to get loader version");
2107    } else {
2108      PRINTK (KERN_INFO, "loader version is %08x", version);
2109      
2110      if (ucode_init (&lb, dev)) {
2111        PRINTK (KERN_ERR, "microcode failure");
2112      } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2113        PRINTK (KERN_ERR, "failed to get memory for queues");
2114      } else {
2115        
2116        if (amb_talk (dev)) {
2117          PRINTK (KERN_ERR, "adapter did not accept queues");
2118        } else {
2119          
2120          amb_ucode_version (dev);
2121          return 0;
2122          
2123        } /* amb_talk */
2124        
2125        destroy_queues (dev);
2126      } /* create_queues, ucode_init */
2127      
2128      amb_reset (dev, 0);
2129    } /* get_loader_version */
2130    
2131  } /* amb_reset */
2132  
2133  return -EINVAL;
2134}
2135
2136static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) 
2137{
2138      unsigned char pool;
2139      
2140      // set up known dev items straight away
2141      dev->pci_dev = pci_dev; 
2142      pci_set_drvdata(pci_dev, dev);
2143      
2144      dev->iobase = pci_resource_start (pci_dev, 1);
2145      dev->irq = pci_dev->irq; 
2146      dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2147      
2148      // flags (currently only dead)
2149      dev->flags = 0;
2150      
2151      // Allocate cell rates (fibre)
2152      // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2153      // to be really pedantic, this should be ATM_OC3c_PCR
2154      dev->tx_avail = ATM_OC3_PCR;
2155      dev->rx_avail = ATM_OC3_PCR;
2156      
2157#ifdef FILL_RX_POOLS_IN_BH
2158      // initialise bottom half
2159      INIT_WORK(&dev->bh, (void (*)(void *)) fill_rx_pools, dev);
2160#endif
2161      
2162      // semaphore for txer/rxer modifications - we cannot use a
2163      // spinlock as the critical region needs to switch processes
2164      mutex_init(&dev->vcc_sf);
2165      // queue manipulation spinlocks; we want atomic reads and
2166      // writes to the queue descriptors (handles IRQ and SMP)
2167      // consider replacing "int pending" -> "atomic_t available"
2168      // => problem related to who gets to move queue pointers
2169      spin_lock_init (&dev->cq.lock);
2170      spin_lock_init (&dev->txq.lock);
2171      for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2172        spin_lock_init (&dev->rxq[pool].lock);
2173}
2174
2175static void setup_pci_dev(struct pci_dev *pci_dev)
2176{
2177        unsigned char lat;
2178      
2179        // enable bus master accesses
2180        pci_set_master(pci_dev);
2181
2182        // frobnicate latency (upwards, usually)
2183        pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2184
2185        if (!pci_lat)
2186                pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2187
2188        if (lat != pci_lat) {
2189                PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2190                        lat, pci_lat);
2191                pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2192        }
2193}
2194
2195static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2196{
2197        amb_dev * dev;
2198        int err;
2199        unsigned int irq;
2200      
2201        err = pci_enable_device(pci_dev);
2202        if (err < 0) {
2203                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2204                goto out;
2205        }
2206
2207        // read resources from PCI configuration space
2208        irq = pci_dev->irq;
2209
2210        if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2211                PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2212                err = -EINVAL;
2213                goto out_disable;
2214        }
2215
2216        PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2217                " IO %llx, IRQ %u, MEM %p",
2218                (unsigned long long)pci_resource_start(pci_dev, 1),
2219                irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2220
2221        // check IO region
2222        err = pci_request_region(pci_dev, 1, DEV_LABEL);
2223        if (err < 0) {
2224                PRINTK (KERN_ERR, "IO range already in use!");
2225                goto out_disable;
2226        }
2227
2228        dev = kzalloc(sizeof(amb_dev), GFP_KERNEL);
2229        if (!dev) {
2230                PRINTK (KERN_ERR, "out of memory!");
2231                err = -ENOMEM;
2232                goto out_release;
2233        }
2234
2235        setup_dev(dev, pci_dev);
2236
2237        err = amb_init(dev);
2238        if (err < 0) {
2239                PRINTK (KERN_ERR, "adapter initialisation failure");
2240                goto out_free;
2241        }
2242
2243        setup_pci_dev(pci_dev);
2244
2245        // grab (but share) IRQ and install handler
2246        err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2247        if (err < 0) {
2248                PRINTK (KERN_ERR, "request IRQ failed!");
2249                goto out_reset;
2250        }
2251
2252        dev->atm_dev = atm_dev_register (DEV_LABEL, &pci_dev->dev, &amb_ops, -1,
2253                                         NULL);
2254        if (!dev->atm_dev) {
2255                PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2256                err = -EINVAL;
2257                goto out_free_irq;
2258        }
2259
2260        PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2261                dev->atm_dev->number, dev, dev->atm_dev);
2262                dev->atm_dev->dev_data = (void *) dev;
2263
2264        // register our address
2265        amb_esi (dev, dev->atm_dev->esi);
2266
2267        // 0 bits for vpi, 10 bits for vci
2268        dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2269        dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2270
2271        init_timer(&dev->housekeeping);
2272        dev->housekeeping.function = do_housekeeping;
2273        dev->housekeeping.data = (unsigned long) dev;
2274        mod_timer(&dev->housekeeping, jiffies);
2275
2276        // enable host interrupts
2277        interrupts_on (dev);
2278
2279out:
2280        return err;
2281
2282out_free_irq:
2283        free_irq(irq, dev);
2284out_reset:
2285        amb_reset(dev, 0);
2286out_free:
2287        kfree(dev);
2288out_release:
2289        pci_release_region(pci_dev, 1);
2290out_disable:
2291        pci_disable_device(pci_dev);
2292        goto out;
2293}
2294
2295
2296static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2297{
2298        struct amb_dev *dev;
2299
2300        dev = pci_get_drvdata(pci_dev);
2301
2302        PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2303        del_timer_sync(&dev->housekeeping);
2304        // the drain should not be necessary
2305        drain_rx_pools(dev);
2306        interrupts_off(dev);
2307        amb_reset(dev, 0);
2308        free_irq(dev->irq, dev);
2309        pci_disable_device(pci_dev);
2310        destroy_queues(dev);
2311        atm_dev_deregister(dev->atm_dev);
2312        kfree(dev);
2313        pci_release_region(pci_dev, 1);
2314}
2315
2316static void __init amb_check_args (void) {
2317  unsigned char pool;
2318  unsigned int max_rx_size;
2319  
2320#ifdef DEBUG_AMBASSADOR
2321  PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2322#else
2323  if (debug)
2324    PRINTK (KERN_NOTICE, "no debugging support");
2325#endif
2326  
2327  if (cmds < MIN_QUEUE_SIZE)
2328    PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2329            cmds = MIN_QUEUE_SIZE);
2330  
2331  if (txs < MIN_QUEUE_SIZE)
2332    PRINTK (KERN_NOTICE, "txs has been raised to %u",
2333            txs = MIN_QUEUE_SIZE);
2334  
2335  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2336    if (rxs[pool] < MIN_QUEUE_SIZE)
2337      PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2338              pool, rxs[pool] = MIN_QUEUE_SIZE);
2339  
2340  // buffers sizes should be greater than zero and strictly increasing
2341  max_rx_size = 0;
2342  for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2343    if (rxs_bs[pool] <= max_rx_size)
2344      PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2345              pool, rxs_bs[pool]);
2346    else
2347      max_rx_size = rxs_bs[pool];
2348  
2349  if (rx_lats < MIN_RX_BUFFERS)
2350    PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2351            rx_lats = MIN_RX_BUFFERS);
2352  
2353  return;
2354}
2355
2356/********** module stuff **********/
2357
2358MODULE_AUTHOR(maintainer_string);
2359MODULE_DESCRIPTION(description_string);
2360MODULE_LICENSE("GPL");
2361MODULE_FIRMWARE("atmsar11.fw");
2362module_param(debug,   ushort, 0644);
2363module_param(cmds,    uint, 0);
2364module_param(txs,     uint, 0);
2365module_param_array(rxs,     uint, NULL, 0);
2366module_param_array(rxs_bs,  uint, NULL, 0);
2367module_param(rx_lats, uint, 0);
2368module_param(pci_lat, byte, 0);
2369MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2370MODULE_PARM_DESC(cmds,    "number of command queue entries");
2371MODULE_PARM_DESC(txs,     "number of TX queue entries");
2372MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2373MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2374MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2375MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2376
2377/********** module entry **********/
2378
2379static struct pci_device_id amb_pci_tbl[] = {
2380        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR), 0 },
2381        { PCI_VDEVICE(MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD), 0 },
2382        { 0, }
2383};
2384
2385MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2386
2387static struct pci_driver amb_driver = {
2388        .name =         "amb",
2389        .probe =        amb_probe,
2390        .remove =       __devexit_p(amb_remove_one),
2391        .id_table =     amb_pci_tbl,
2392};
2393
2394static int __init amb_module_init (void)
2395{
2396  PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2397  
2398  // sanity check - cast needed as printk does not support %Zu
2399  if (sizeof(amb_mem) != 4*16 + 4*12) {
2400    PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2401            (unsigned long) sizeof(amb_mem));
2402    return -ENOMEM;
2403  }
2404  
2405  show_version();
2406  
2407  amb_check_args();
2408  
2409  // get the juice
2410  return pci_register_driver(&amb_driver);
2411}
2412
2413/********** module exit **********/
2414
2415static void __exit amb_module_exit (void)
2416{
2417  PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2418
2419  pci_unregister_driver(&amb_driver);
2420}
2421
2422module_init(amb_module_init);
2423module_exit(amb_module_exit);
2424